grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC] Hacky MTRR support


From: Colin Watson
Subject: [RFC] Hacky MTRR support
Date: Fri, 25 Jun 2010 09:58:34 +0100
User-agent: Mutt/1.5.18 (2008-05-17)

I recently posted ("Subject: [PATCH] Optimise memset on i386" - sorry, I
don't seem to have a route to lists.gnu.org at the moment so I can't
post an archive link) about optimising GRUB's video initialisation, and
hinted that it might be possible to do better by implementing MTRRs as
well in order to allow the system to combine writes to video memory
rather than taking a cache stall for every single write.  I can report
that, at least on the hardware I was using, it does make a significant
difference: filling the screen with solid colour now takes 10
milliseconds rather than 160!  This ended up shaving about a second off
the boot time of the project I'm working on.

On Linux, you can tell whether this is likely to be the case by
comparing the kernel log from startup with /proc/mtrr.  For example, on
my Dell Latitude D830 with BIOS version A02, the kernel log says:

  MTRR variable ranges enabled:
    0 base 000000000 mask F80000000 write-back
    1 base 080000000 mask FC0000000 write-back
    2 base 0BF800000 mask FFF800000 uncachable
    3 base 0BF700000 mask FFFF00000 uncachable
    4 disabled
    5 disabled
    6 disabled
    7 disabled

... while /proc/mtrr says:

  reg00: base=0x000000000 (    0MB), size= 2048MB, count=1: write-back
  reg01: base=0x080000000 ( 2048MB), size= 1024MB, count=1: write-back
  reg02: base=0x0bf800000 ( 3064MB), size=    8MB, count=1: uncachable
  reg03: base=0x0bf700000 ( 3063MB), size=    1MB, count=1: uncachable
  reg04: base=0x0e0000000 ( 3584MB), size=  256MB, count=1: write-combining

The extra write-combining entry there matches the video memory aperture
(you can check this by comparing with /proc/iomem and 'lspci -vvnn'),
and I think it's set up by the X driver.

The following patch is against 1.98, since that's what I was developing
on.  I'm not posting this as a serious merge proposal right now so I
haven't included a ChangeLog or anything; this is just an RFC, and might
be useful for others interested in this kind of thing (Seth Goldberg
commented on IRC that he had been looking into something similar).

Flaws in this approach include:

  * Doesn't work with anything other than the generic Intel MTRR system
    (although modern AMD chips have this too)

  * Very simplistic MTRR handling: anything other than a card with a
    power-of-two memory region aligned on a same-power-of-two boundary
    will degrade to previous behaviour

  * Might break older cards where write-combining isn't permitted (I
    found an Intel paper saying such cards existed); in particular it's
    possible that some cards might put registers in that region

  * Entirely unconfigurable

It's also probably in the wrong place in the tree; I imagine EFI drivers
would want to do much the same thing, for example - and my patch has far
too many magic constants.  I did at least take care to disable any MTRR
added by GRUB before starting the target kernel; who knows what effects
that might have, especially on multiprocessor systems (although Linux
does work around out-of-sync MTRRs across CPUs).

Still, I'd like to know whether this is of general enough interest to
merit polishing it up and maybe offering it as some kind of configurable
option.  I do think that this is a BIOS bug, but it seems to be a not
uncommon one and it does have a pretty noticeable effect on GRUB's video
performance.

diff -Nur -x '*.orig' -x '*~' grub2-1.98/video/i386/pc/vbe.c 
grub2-1.98.new/video/i386/pc/vbe.c
--- grub2-1.98/video/i386/pc/vbe.c      2010-06-24 21:52:06.000000000 +0000
+++ grub2-1.98.new/video/i386/pc/vbe.c  2010-06-24 22:12:05.000000000 +0000
@@ -28,6 +28,7 @@
 #include <grub/misc.h>
 #include <grub/mm.h>
 #include <grub/video.h>
+#include <grub/cpu/tsc.h>
 
 static int vbe_detected = -1;
 
@@ -48,6 +49,7 @@
   grub_uint32_t active_vbe_mode;
   grub_uint8_t *ptr;
   int index_color_mode;
+  int mtrr;
 
   char *offscreen_buffer;
 
@@ -124,6 +126,157 @@
   return GRUB_ERR_NONE;
 }
 
+#define cpuid(num,a,b,c,d) \
+  asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" \
+                : "=a" (a), "=r" (b), "=c" (c), "=d" (d)  \
+                : "0" (num))
+
+#define rdmsr(num,a,d) \
+  asm volatile ("rdmsr" : "=a" (a), "=d" (d) : "c" (num))
+
+#define wrmsr(num,lo,hi) \
+  asm volatile ("wrmsr" : : "c" (num), "a" (lo), "d" (hi) : "memory")
+
+#define mtrr_base(reg) (0x200 + (reg) * 2)
+#define mtrr_mask(reg) (0x200 + (reg) * 2 + 1)
+
+/* Try to set up a variable-range write-combining MTRR for a memory region.
+   This is best-effort; if it seems too hard, we just accept the performance
+   degradation rather than risking undefined behaviour.  It is intended
+   exclusively to work around BIOS bugs, as the BIOS should already be
+   setting up a suitable MTRR.  */
+static void
+grub_vbe_enable_mtrr (grub_uint8_t *base, grub_size_t size)
+{
+  grub_uint32_t eax, ebx, ecx, edx;
+  grub_uint32_t features;
+  grub_uint32_t mtrrcap;
+  int var_mtrrs;
+  grub_uint32_t max_extended_cpuid;
+  grub_uint32_t maxphyaddr;
+  grub_uint64_t fb_base, fb_size;
+  grub_uint64_t size_bits, fb_mask;
+  grub_uint32_t bits_lo, bits_hi;
+  grub_uint64_t bits;
+  int i, first_unused = -1;
+  grub_uint32_t base_lo, base_hi, mask_lo, mask_hi;
+
+  fb_base = (grub_uint64_t) (grub_size_t) base;
+  fb_size = (grub_uint64_t) size;
+
+  /* Check that fb_base and fb_size can be represented using a single
+     MTRR.  */
+
+  if (fb_base < (1 << 20))
+    return; /* under 1MB, so covered by fixed-range MTRRs */
+  if (fb_base >= (1LL << 36))
+    return; /* over 36 bits, so out of range */
+  if (fb_size < (1 << 12))
+    return; /* variable-range MTRRs must cover at least 4KB */
+
+  size_bits = fb_size;
+  while (size_bits > 1)
+    size_bits >>= 1;
+  if (size_bits != 1)
+    return; /* not a power of two */
+
+  if (fb_base & (fb_size - 1))
+    return; /* not aligned on size boundary */
+
+  fb_mask = ~(fb_size - 1);
+
+  /* Check CPU capabilities.  */
+
+  if (! grub_cpu_is_cpuid_supported ())
+    return;
+
+  cpuid (1, eax, ebx, ecx, edx);
+  features = edx;
+  if (! (features & 0x00001000)) /* MTRR */
+    return;
+
+  rdmsr (0xFE, eax, edx);
+  mtrrcap = eax;
+  if (! (mtrrcap & 0x00000400)) /* write-combining */
+    return;
+  var_mtrrs = (mtrrcap & 0xFF);
+
+  cpuid (0x80000000, eax, ebx, ecx, edx);
+  max_extended_cpuid = eax;
+  if (max_extended_cpuid >= 0x80000008)
+    {
+      cpuid (0x80000008, eax, ebx, ecx, edx);
+      maxphyaddr = (eax & 0xFF);
+    }
+  else
+    maxphyaddr = 36;
+  bits_lo = 0xFFFFF000; /* assume maxphyaddr >= 36 */
+  bits_hi = (1 << (maxphyaddr - 32)) - 1;
+  bits = bits_lo | ((grub_uint64_t) bits_hi << 32);
+
+  /* Check whether an MTRR already covers this region.  If not, take an
+     unused one if possible.  */
+  for (i = 0; i < var_mtrrs; i++)
+    {
+      rdmsr (mtrr_mask (i), eax, edx);
+      mask_lo = eax;
+      mask_hi = edx;
+      if (mask_lo & 0x800) /* valid */
+       {
+         grub_uint64_t real_base, real_mask;
+
+         rdmsr (mtrr_base (i), eax, edx);
+         base_lo = eax;
+         base_hi = edx;
+
+         real_base = ((grub_uint64_t) (base_hi & bits_hi) << 32) |
+                     (base_lo & bits_lo);
+         real_mask = ((grub_uint64_t) (mask_hi & bits_hi) << 32) |
+                      (mask_lo & bits_lo);
+         if (real_base < (fb_base + fb_size) &&
+             real_base + (~real_mask & bits) >= fb_base)
+           return; /* existing MTRR overlaps this region */
+       }
+      else if (first_unused < 0)
+       first_unused = i;
+    }
+
+  if (first_unused < 0)
+    return; /* all MTRRs in use */
+
+  /* Set up the first unused MTRR we found.  */
+  rdmsr (mtrr_base (first_unused), eax, edx);
+  base_lo = eax;
+  base_hi = edx;
+  rdmsr (mtrr_mask (first_unused), eax, edx);
+  mask_lo = eax;
+  mask_hi = edx;
+
+  base_lo = (base_lo & ~bits_lo & ~0xFF) |
+           (fb_base & bits_lo) | 0x01 /* WC */;
+  base_hi = (base_hi & ~bits_hi) | ((fb_base >> 32) & bits_hi);
+  wrmsr (mtrr_base (first_unused), base_lo, base_hi);
+  mask_lo = (mask_lo & ~bits_lo) | (fb_mask & bits_lo) | 0x800 /* valid */;
+  mask_hi = (mask_hi & ~bits_hi) | ((fb_mask >> 32) & bits_hi);
+  wrmsr (mtrr_mask (first_unused), mask_lo, mask_hi);
+
+  framebuffer.mtrr = first_unused;
+}
+
+static void
+grub_vbe_disable_mtrr (int mtrr)
+{
+  grub_uint32_t eax, edx;
+  grub_uint32_t mask_lo, mask_hi;
+
+  rdmsr (mtrr_mask (mtrr), eax, edx);
+  mask_lo = eax;
+  mask_hi = edx;
+
+  mask_lo &= ~0x800 /* valid */;
+  wrmsr (mtrr_mask (mtrr), mask_lo, mask_hi);
+}
+
 grub_err_t
 grub_vbe_set_video_mode (grub_uint32_t vbe_mode,
                         struct grub_vbe_mode_info_block *vbe_mode_info)
@@ -355,6 +508,7 @@
 
   /* Reset frame buffer.  */
   grub_memset (&framebuffer, 0, sizeof(framebuffer));
+  framebuffer.mtrr = -1;
 
   return grub_video_fb_init ();
 }
@@ -378,6 +532,11 @@
 
   err = grub_video_fb_fini ();
   grub_free (framebuffer.offscreen_buffer);
+  if (framebuffer.mtrr >= 0)
+    {
+      grub_vbe_disable_mtrr (framebuffer.mtrr);
+      framebuffer.mtrr = -1;
+    }
   return err;
 }
 
@@ -684,6 +843,9 @@
 
       framebuffer.mode_info.blit_format = grub_video_get_blit_format 
(&framebuffer.mode_info);
 
+      grub_vbe_enable_mtrr (framebuffer.ptr,
+                           controller_info.total_memory << 16);
+
       /* Set up double buffering and targets.  */
       err = double_buffering_init (mode_type, mode_mask);
       if (err)
@@ -774,6 +936,11 @@
 
   grub_video_fb_fini ();
   grub_free (framebuffer.offscreen_buffer);
+  if (framebuffer.mtrr >= 0)
+    {
+      grub_vbe_disable_mtrr (framebuffer.mtrr);
+      framebuffer.mtrr = -1;
+    }
 
   return GRUB_ERR_NONE;
 }

Thanks,

-- 
Colin Watson                                       address@hidden



reply via email to

[Prev in Thread] Current Thread [Next in Thread]