Skip to content

Commit 2b97620

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: "The x86 updates contain: - A fix for a longstanding PAT bug, where PAT was reported on CPUs that do not support it, which leads to wrong caching attributes and missing MTRR updates - Prevent overwriting of the e820 firmware table, which causes kexec kernels to lose the fake mptable which is stored there. - Cleanup of the UV/BAU code, removing unused code and making local functions static" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/boot/e820: Introduce the bootloader provided e820_table_firmware[] table x86/boot/e820: Rename the e820_table_firmware to e820_table_kexec x86/boot/e820: Avoid overwriting e820_table_firmware x86/mm/pat: Don't report PAT on CPUs that don't support it x86/platform/uv/BAU: Minor cleanup, make some local functions static
2 parents 8d97a6c + 12df216 commit 2b97620

File tree

7 files changed

+65
-56
lines changed

7 files changed

+65
-56
lines changed

arch/x86/include/asm/e820/api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <asm/e820/types.h>
55

66
extern struct e820_table *e820_table;
7+
extern struct e820_table *e820_table_kexec;
78
extern struct e820_table *e820_table_firmware;
89

910
extern unsigned long pci_mem_start;

arch/x86/include/asm/pat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
bool pat_enabled(void);
88
void pat_disable(const char *reason);
99
extern void pat_init(void);
10+
extern void init_cache_modes(void);
1011

1112
extern int reserve_memtype(u64 start, u64 end,
1213
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);

arch/x86/kernel/e820.c

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,27 @@
2020
#include <asm/setup.h>
2121

2222
/*
23-
* We organize the E820 table into two main data structures:
23+
* We organize the E820 table into three main data structures:
2424
*
2525
* - 'e820_table_firmware': the original firmware version passed to us by the
26-
* bootloader - not modified by the kernel. We use this to:
26+
* bootloader - not modified by the kernel. It is composed of two parts:
27+
* the first 128 E820 memory entries in boot_params.e820_table and the remaining
28+
* (if any) entries of the SETUP_E820_EXT nodes. We use this to:
2729
*
2830
* - inform the user about the firmware's notion of memory layout
2931
* via /sys/firmware/memmap
3032
*
3133
* - the hibernation code uses it to generate a kernel-independent MD5
3234
* fingerprint of the physical memory layout of a system.
3335
*
36+
* - 'e820_table_kexec': a slightly modified (by the kernel) firmware version
37+
* passed to us by the bootloader - the major difference between
38+
* e820_table_firmware[] and this one is that, the latter marks the setup_data
39+
* list created by the EFI boot stub as reserved, so that kexec can reuse the
40+
* setup_data information in the second kernel. Besides, e820_table_kexec[]
41+
* might also be modified by the kexec itself to fake a mptable.
42+
* We use this to:
43+
*
3444
* - kexec, which is a bootloader in disguise, uses the original E820
3545
* layout to pass to the kexec-ed kernel. This way the original kernel
3646
* can have a restricted E820 map while the kexec()-ed kexec-kernel
@@ -46,9 +56,11 @@
4656
* specific memory layout data during early bootup.
4757
*/
4858
static struct e820_table e820_table_init __initdata;
59+
static struct e820_table e820_table_kexec_init __initdata;
4960
static struct e820_table e820_table_firmware_init __initdata;
5061

5162
struct e820_table *e820_table __refdata = &e820_table_init;
63+
struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init;
5264
struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init;
5365

5466
/* For PCI or other memory-mapped resources */
@@ -470,9 +482,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
470482
return __e820__range_update(e820_table, start, size, old_type, new_type);
471483
}
472484

473-
static u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
485+
static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
474486
{
475-
return __e820__range_update(e820_table_firmware, start, size, old_type, new_type);
487+
return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
476488
}
477489

478490
/* Remove a range of memory from the E820 table: */
@@ -546,9 +558,9 @@ void __init e820__update_table_print(void)
546558
e820__print_table("modified");
547559
}
548560

549-
static void __init e820__update_table_firmware(void)
561+
static void __init e820__update_table_kexec(void)
550562
{
551-
e820__update_table(e820_table_firmware);
563+
e820__update_table(e820_table_kexec);
552564
}
553565

554566
#define MAX_GAP_END 0x100000000ull
@@ -623,7 +635,7 @@ __init void e820__setup_pci_gap(void)
623635
/*
624636
* Called late during init, in free_initmem().
625637
*
626-
* Initial e820_table and e820_table_firmware are largish __initdata arrays.
638+
* Initial e820_table and e820_table_kexec are largish __initdata arrays.
627639
*
628640
* Copy them to a (usually much smaller) dynamically allocated area that is
629641
* sized precisely after the number of e820 entries.
@@ -643,6 +655,12 @@ __init void e820__reallocate_tables(void)
643655
memcpy(n, e820_table, size);
644656
e820_table = n;
645657

658+
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries;
659+
n = kmalloc(size, GFP_KERNEL);
660+
BUG_ON(!n);
661+
memcpy(n, e820_table_kexec, size);
662+
e820_table_kexec = n;
663+
646664
size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries;
647665
n = kmalloc(size, GFP_KERNEL);
648666
BUG_ON(!n);
@@ -669,6 +687,9 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
669687
__append_e820_table(extmap, entries);
670688
e820__update_table(e820_table);
671689

690+
memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
691+
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
692+
672693
early_memunmap(sdata, data_len);
673694
pr_info("e820: extended physical RAM map:\n");
674695
e820__print_table("extended");
@@ -727,7 +748,7 @@ core_initcall(e820__register_nvs_regions);
727748
/*
728749
* Allocate the requested number of bytes with the requsted alignment
729750
* and return (the physical address) to the caller. Also register this
730-
* range in the 'firmware' E820 table as a reserved range.
751+
* range in the 'kexec' E820 table as a reserved range.
731752
*
732753
* This allows kexec to fake a new mptable, as if it came from the real
733754
* system.
@@ -738,9 +759,9 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
738759

739760
addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
740761
if (addr) {
741-
e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
742-
pr_info("e820: update e820_table_firmware for e820__memblock_alloc_reserved()\n");
743-
e820__update_table_firmware();
762+
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
763+
pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n");
764+
e820__update_table_kexec();
744765
}
745766

746767
return addr;
@@ -923,13 +944,13 @@ void __init e820__reserve_setup_data(void)
923944
while (pa_data) {
924945
data = early_memremap(pa_data, sizeof(*data));
925946
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
947+
e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
926948
pa_data = data->next;
927949
early_memunmap(data, sizeof(*data));
928950
}
929951

930952
e820__update_table(e820_table);
931-
932-
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
953+
e820__update_table(e820_table_kexec);
933954

934955
pr_info("extended physical RAM map:\n");
935956
e820__print_table("reserve setup_data");
@@ -1062,6 +1083,7 @@ void __init e820__reserve_resources(void)
10621083
res++;
10631084
}
10641085

1086+
/* Expose the bootloader-provided memory layout to the sysfs. */
10651087
for (i = 0; i < e820_table_firmware->nr_entries; i++) {
10661088
struct e820_entry *entry = e820_table_firmware->entries + i;
10671089

@@ -1175,6 +1197,7 @@ void __init e820__memory_setup(void)
11751197

11761198
who = x86_init.resources.memory_setup();
11771199

1200+
memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
11781201
memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
11791202

11801203
pr_info("e820: BIOS-provided physical RAM map:\n");

arch/x86/kernel/kexec-bzimage64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,14 @@ static int setup_e820_entries(struct boot_params *params)
100100
{
101101
unsigned int nr_e820_entries;
102102

103-
nr_e820_entries = e820_table_firmware->nr_entries;
103+
nr_e820_entries = e820_table_kexec->nr_entries;
104104

105105
/* TODO: Pass entries more than E820_MAX_ENTRIES_ZEROPAGE in bootparams setup data */
106106
if (nr_e820_entries > E820_MAX_ENTRIES_ZEROPAGE)
107107
nr_e820_entries = E820_MAX_ENTRIES_ZEROPAGE;
108108

109109
params->e820_entries = nr_e820_entries;
110-
memcpy(&params->e820_table, &e820_table_firmware->entries, nr_e820_entries*sizeof(struct e820_entry));
110+
memcpy(&params->e820_table, &e820_table_kexec->entries, nr_e820_entries*sizeof(struct e820_entry));
111111

112112
return 0;
113113
}

arch/x86/kernel/setup.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,13 @@ void __init setup_arch(char **cmdline_p)
10751075

10761076
max_possible_pfn = max_pfn;
10771077

1078+
/*
1079+
* This call is required when the CPU does not support PAT. If
1080+
* mtrr_bp_init() invoked it already via pat_init() the call has no
1081+
* effect.
1082+
*/
1083+
init_cache_modes();
1084+
10781085
/*
10791086
* Define random base addresses for memory sections after max_pfn is
10801087
* defined and before each memory section base is used.

arch/x86/mm/pat.c

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -37,25 +37,23 @@
3737
#undef pr_fmt
3838
#define pr_fmt(fmt) "" fmt
3939

40-
static bool boot_cpu_done;
41-
42-
static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
43-
static void init_cache_modes(void);
40+
static bool __read_mostly boot_cpu_done;
41+
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
42+
static bool __read_mostly pat_initialized;
43+
static bool __read_mostly init_cm_done;
4444

4545
void pat_disable(const char *reason)
4646
{
47-
if (!__pat_enabled)
47+
if (pat_disabled)
4848
return;
4949

5050
if (boot_cpu_done) {
5151
WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
5252
return;
5353
}
5454

55-
__pat_enabled = 0;
55+
pat_disabled = true;
5656
pr_info("x86/PAT: %s\n", reason);
57-
58-
init_cache_modes();
5957
}
6058

6159
static int __init nopat(char *str)
@@ -67,7 +65,7 @@ early_param("nopat", nopat);
6765

6866
bool pat_enabled(void)
6967
{
70-
return !!__pat_enabled;
68+
return pat_initialized;
7169
}
7270
EXPORT_SYMBOL_GPL(pat_enabled);
7371

@@ -205,6 +203,8 @@ static void __init_cache_modes(u64 pat)
205203
update_cache_mode_entry(i, cache);
206204
}
207205
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
206+
207+
init_cm_done = true;
208208
}
209209

210210
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
@@ -225,6 +225,7 @@ static void pat_bsp_init(u64 pat)
225225
}
226226

227227
wrmsrl(MSR_IA32_CR_PAT, pat);
228+
pat_initialized = true;
228229

229230
__init_cache_modes(pat);
230231
}
@@ -242,10 +243,9 @@ static void pat_ap_init(u64 pat)
242243
wrmsrl(MSR_IA32_CR_PAT, pat);
243244
}
244245

245-
static void init_cache_modes(void)
246+
void init_cache_modes(void)
246247
{
247248
u64 pat = 0;
248-
static int init_cm_done;
249249

250250
if (init_cm_done)
251251
return;
@@ -287,8 +287,6 @@ static void init_cache_modes(void)
287287
}
288288

289289
__init_cache_modes(pat);
290-
291-
init_cm_done = 1;
292290
}
293291

294292
/**
@@ -306,10 +304,8 @@ void pat_init(void)
306304
u64 pat;
307305
struct cpuinfo_x86 *c = &boot_cpu_data;
308306

309-
if (!pat_enabled()) {
310-
init_cache_modes();
307+
if (pat_disabled)
311308
return;
312-
}
313309

314310
if ((c->x86_vendor == X86_VENDOR_INTEL) &&
315311
(((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||

arch/x86/platform/uv/tlb_uv.c

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -587,32 +587,12 @@ static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc
587587
return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
588588
}
589589

590-
/*
591-
* Return whether the status of the descriptor that is normally used for this
592-
* cpu (the one indexed by its hub-relative cpu number) is busy.
593-
* The status of the original 32 descriptors is always reflected in the 64
594-
* bits of UVH_LB_BAU_SB_ACTIVATION_STATUS_0.
595-
* The bit provided by the activation_status_2 register is irrelevant to
596-
* the status if it is only being tested for busy or not busy.
597-
*/
598-
int normal_busy(struct bau_control *bcp)
599-
{
600-
int cpu = bcp->uvhub_cpu;
601-
int mmr_offset;
602-
int right_shift;
603-
604-
mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
605-
right_shift = cpu * UV_ACT_STATUS_SIZE;
606-
return (((((read_lmmr(mmr_offset) >> right_shift) &
607-
UV_ACT_STATUS_MASK)) << 1) == UV2H_DESC_BUSY);
608-
}
609-
610590
/*
611591
* Entered when a bau descriptor has gone into a permanent busy wait because
612592
* of a hardware bug.
613593
* Workaround the bug.
614594
*/
615-
int handle_uv2_busy(struct bau_control *bcp)
595+
static int handle_uv2_busy(struct bau_control *bcp)
616596
{
617597
struct ptc_stats *stat = bcp->statp;
618598

@@ -917,8 +897,9 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
917897
* Returns 1 if it gives up entirely and the original cpu mask is to be
918898
* returned to the kernel.
919899
*/
920-
int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
921-
struct bau_desc *bau_desc)
900+
static int uv_flush_send_and_wait(struct cpumask *flush_mask,
901+
struct bau_control *bcp,
902+
struct bau_desc *bau_desc)
922903
{
923904
int seq_number = 0;
924905
int completion_stat = 0;
@@ -1212,8 +1193,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
12121193
* Search the message queue for any 'other' unprocessed message with the
12131194
* same software acknowledge resource bit vector as the 'msg' message.
12141195
*/
1215-
struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1216-
struct bau_control *bcp)
1196+
static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1197+
struct bau_control *bcp)
12171198
{
12181199
struct bau_pq_entry *msg_next = msg + 1;
12191200
unsigned char swack_vec = msg->swack_vec;

0 commit comments

Comments
 (0)