diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-08-23 21:39:20 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-08-23 21:39:20 +0200 |
commit | bcf988aff1316d675b4353549197662e6f5d7b17 (patch) | |
tree | ff62d2bcbd16464d1cba5aee4a1f80ea5d3d828b /sys/src/9/bcm64 | |
parent | e6d22570a84ce1e158f184a8a90cfd53be48bbf4 (diff) |
bcm64: deal with discontinuous memory regions, avoid virtual memory aliasing, implement vmap() proper
on the 2GB and 4GB raspberry pi 4 variants, there are two
memory regions for ram:
[0x00000000..0x3e600000)
[0x40000000..0xfc000000)
the framebuffer is somewhere at the end of the first
GB of memory.
to handle these, we append the region base and limit
of the second region to *maxmem= like:
*maxmem=0x3e600000 0x40000000 0xfc000000
the mmu code has been changed to have non-existing
ram unmapped and mmukmap() now uses small 64K pages
instead of 512GB pages to avoid aliasing (framebuffer).
the VIRTPCI mapping has been removed as we now have
a proper vmap() implementation which assigns vritual
addresses automatically.
Diffstat (limited to 'sys/src/9/bcm64')
-rw-r--r-- | sys/src/9/bcm64/dat.h | 2 | ||||
-rw-r--r-- | sys/src/9/bcm64/fns.h | 1 | ||||
-rw-r--r-- | sys/src/9/bcm64/main.c | 39 | ||||
-rw-r--r-- | sys/src/9/bcm64/mem.h | 8 | ||||
-rw-r--r-- | sys/src/9/bcm64/mmu.c | 228 |
5 files changed, 176 insertions, 102 deletions
diff --git a/sys/src/9/bcm64/dat.h b/sys/src/9/bcm64/dat.h index 8f1b923c3..e4aa6fd37 100644 --- a/sys/src/9/bcm64/dat.h +++ b/sys/src/9/bcm64/dat.h @@ -98,7 +98,7 @@ struct Conf { ulong nmach; /* processors */ ulong nproc; /* processes */ - Confmem mem[1]; /* physical memory */ + Confmem mem[4]; /* physical memory */ ulong npage; /* total physical pages of memory */ usize upages; /* user page pool */ ulong copymode; /* 0 is copy on write, 1 is copy on reference */ diff --git a/sys/src/9/bcm64/fns.h b/sys/src/9/bcm64/fns.h index 5ace48713..2b665eafe 100644 --- a/sys/src/9/bcm64/fns.h +++ b/sys/src/9/bcm64/fns.h @@ -75,6 +75,7 @@ extern void mmu0init(uintptr*); extern void mmu0clear(uintptr*); extern void mmuidmap(uintptr*); extern void mmu1init(void); +extern void meminit(void); extern void putasid(Proc*); diff --git a/sys/src/9/bcm64/main.c b/sys/src/9/bcm64/main.c index cb553f397..c76f388c8 100644 --- a/sys/src/9/bcm64/main.c +++ b/sys/src/9/bcm64/main.c @@ -132,10 +132,10 @@ userinit(void) void confinit(void) { - int i, userpcnt; - ulong kpages, memsize = 0; - uintptr pa; + int userpcnt; + ulong kpages; char *p; + int i; if(p = getconf("service")){ if(strcmp(p, "cpu") == 0) @@ -149,37 +149,13 @@ confinit(void) else userpcnt = 0; - if(p = getconf("*maxmem")) - memsize = strtoul(p, 0, 0) - PHYSDRAM; - if (memsize < 512*MB) /* sanity */ - memsize = 512*MB; - getramsize(&conf.mem[0]); - if(conf.mem[0].limit == 0){ - conf.mem[0].base = PHYSDRAM; - conf.mem[0].limit = PHYSDRAM + memsize; - }else if(p != nil) - conf.mem[0].limit = conf.mem[0].base + memsize; - if (conf.mem[0].limit > PHYSDRAM + soc.dramsize) - conf.mem[0].limit = PHYSDRAM + soc.dramsize; + if(userpcnt < 10) + userpcnt = 60 + cpuserver*10; conf.npage = 0; - pa = PADDR(PGROUND((uintptr)end)); - - /* - * we assume that the kernel is at the beginning of one of the - * contiguous chunks of memory and fits therein. - */ - for(i=0; i<nelem(conf.mem); i++){ - /* take kernel out of allocatable space */ - if(pa > conf.mem[i].base && pa < conf.mem[i].limit) - conf.mem[i].base = pa; - - conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG; + for(i = 0; i < nelem(conf.mem); i++) conf.npage += conf.mem[i].npage; - } - if(userpcnt < 10) - userpcnt = 60 + cpuserver*10; kpages = conf.npage - (conf.npage*userpcnt)/100; /* @@ -278,19 +254,20 @@ main(uintptr arg0) } quotefmtinstall(); bootargsinit(arg0); + meminit(); confinit(); xinit(); printinit(); uartconsinit(); screeninit(); print("\nPlan 9\n"); - xsummary(); /* set clock rate to arm_freq from config.txt */ setclkrate(ClkArm, 0); trapinit(); fpuinit(); + vgpinit(); clockinit(); cpuidprint(); timersinit(); diff --git a/sys/src/9/bcm64/mem.h b/sys/src/9/bcm64/mem.h index 183cb8285..ba8bef451 100644 --- a/sys/src/9/bcm64/mem.h +++ b/sys/src/9/bcm64/mem.h @@ -41,16 +41,18 @@ #define KSEG0 (0xFFFFFFFE00000000ULL) #define KMAP (0xFFFFFFFE00000000ULL) -#define FRAMEBUFFER (0xFFFFFFFF00000000ULL|PTEWT) -#define VGPIO 0 /* virtual gpio for pi3 ACT LED */ -#define VIRTPCI (0xFFFFFFFF80000000ULL) /* virtual pcie mmio */ +#define FRAMEBUFFER (0xFFFFFFFFA0000000ULL|PTEWT) + +#define VMAP (0xFFFFFFFFB0000000ULL) #define VIRTIO2 (0xFFFFFFFFBC000000ULL) /* 0x7C000000 - 0xFC000000 */ #define VIRTIO1 (0xFFFFFFFFBD000000ULL) /* 0x7D000000 - 0xFD000000 */ #define VIRTIO (0xFFFFFFFFBE000000ULL) /* 0x7E000000 0x3F000000 0xFE000000 */ #define ARMLOCAL (0xFFFFFFFFBF800000ULL) /* - 0x40000000 0xFF800000 */ +#define VGPIO (0xFFFFFFFFBF900000ULL|PTEUNCACHED) /* virtual gpio for pi3 ACT LED */ + #define KZERO (0xFFFFFFFFC0000000ULL) /* kernel address space */ #define SPINTABLE (KZERO+0xd8) diff --git a/sys/src/9/bcm64/mmu.c b/sys/src/9/bcm64/mmu.c index 78ff77eac..2fbe19747 100644 --- a/sys/src/9/bcm64/mmu.c +++ b/sys/src/9/bcm64/mmu.c @@ -12,45 +12,11 @@ mmu0init(uintptr *l1) /* KZERO */ attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTESH(SHARE_INNER); - pe = PHYSDRAM + soc.dramsize; - if(pe > (uintptr)-KZERO) - pe = (uintptr)-KZERO; + pe = -KZERO; for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ - if(pe - pa < PGLSZ(1)){ - l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; - l1[PTL1X(pa, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; - for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0)) - l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr; - break; - } l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr; } - if(PTLEVELS > 2) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) - l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; - if(PTLEVELS > 3) - for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) - l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE; - - /* KMAP */ - attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_INNER); - pe = PHYSDRAM + soc.dramsize; - for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){ - if(pe - pa < PGLSZ(1)){ - l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE; - for(; pa < pe; pa += PGLSZ(0), va += PGLSZ(0)) - l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr; - break; - } - l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; - } - if(PTLEVELS > 2) - for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(2), va += PGLSZ(2)) - l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; - if(PTLEVELS > 3) - for(pa = PHYSDRAM, va = KMAP; pa < pe; pa += PGLSZ(3), va += PGLSZ(3)) - l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE; /* VIRTIO */ attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE; @@ -82,14 +48,6 @@ mmu0init(uintptr *l1) l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; } - /* VIRTPCI */ - if(soc.pciwin){ - attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE; - pe = soc.pciwin + 512*MB; - for(pa = soc.pciwin, va = VIRTPCI; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) - l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr; - } - if(PTLEVELS > 2) for(va = KSEG0; va != 0; va += PGLSZ(2)) l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE; @@ -103,9 +61,7 @@ mmu0clear(uintptr *l1) { uintptr va, pa, pe; - pe = PHYSDRAM + soc.dramsize; - if(pe > (uintptr)-KZERO) - pe = (uintptr)-KZERO; + pe = -KZERO; for(pa = PHYSDRAM, va = KZERO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)) if(PTL1X(pa, 1) != PTL1X(va, 1)) l1[PTL1X(pa, 1)] = 0; @@ -201,40 +157,178 @@ kmapinval(void) { } +#define INITMAP (ROUND((uintptr)end + BY2PG, PGLSZ(1))-KZERO) + +static void* +rampage(void) +{ + uintptr pa; + + if(conf.npage) + return mallocalign(BY2PG, BY2PG, 0, 0); + + pa = conf.mem[0].base; + assert((pa % BY2PG) == 0); + assert(pa < INITMAP); + conf.mem[0].base += BY2PG; + return KADDR(pa); +} + +static void +l1map(uintptr va, uintptr pa, uintptr pe, uintptr attr) +{ + uintptr *l1, *l0; + + assert(pa < pe); + + va &= -BY2PG; + pa &= -BY2PG; + pe = PGROUND(pe); + + attr |= PTEKERNEL | PTEAF; + + l1 = (uintptr*)L1; + + while(pa < pe){ + if(l1[PTL1X(va, 1)] == 0 && (pe-pa) >= PGLSZ(1) && ((va|pa) & PGLSZ(1)-1) == 0){ + l1[PTL1X(va, 1)] = PTEVALID | PTEBLOCK | pa | attr; + va += PGLSZ(1); + pa += PGLSZ(1); + continue; + } + if(l1[PTL1X(va, 1)] & PTEVALID) { + assert((l1[PTL1X(va, 1)] & PTETABLE) == PTETABLE); + l0 = KADDR(l1[PTL1X(va, 1)] & -PGLSZ(0)); + } else { + l0 = rampage(); + memset(l0, 0, BY2PG); + l1[PTL1X(va, 1)] = PTEVALID | PTETABLE | PADDR(l0); + } + assert(l0[PTLX(va, 0)] == 0); + l0[PTLX(va, 0)] = PTEVALID | PTEPAGE | pa | attr; + va += BY2PG; + pa += BY2PG; + } +} + +static void +kmapram(uintptr base, uintptr limit) +{ + if(base < (uintptr)-KZERO && limit > (uintptr)-KZERO){ + kmapram(base, (uintptr)-KZERO); + kmapram((uintptr)-KZERO, limit); + return; + } + if(base < INITMAP) + base = INITMAP; + if(base >= limit || limit <= INITMAP) + return; + + l1map((uintptr)kmapaddr(base), base, limit, + PTEWRITE | PTEPXN | PTEUXN | PTESH(SHARE_INNER)); +} + +void +meminit(void) +{ + uvlong memsize = 0; + uintptr pa, va; + char *p, *e; + int i; + + if(p = getconf("*maxmem")){ + memsize = strtoull(p, &e, 0) - PHYSDRAM; + for(i = 1; i < nelem(conf.mem); i++){ + if(e <= p || *e != ' ') + break; + p = ++e; + conf.mem[i].base = strtoull(p, &e, 0); + if(e <= p || *e != ' ') + break; + p = ++e; + conf.mem[i].limit = strtoull(p, &e, 0); + } + } + + if (memsize < INITMAP) /* sanity */ + memsize = INITMAP; + + getramsize(&conf.mem[0]); + if(conf.mem[0].limit == 0){ + conf.mem[0].base = PHYSDRAM; + conf.mem[0].limit = PHYSDRAM + memsize; + }else if(p != nil) + conf.mem[0].limit = conf.mem[0].base + memsize; + + /* + * now we know the real memory regions, unmap + * everything above INITMAP and map again with + * the proper sizes. + */ + coherence(); + for(va = INITMAP+KZERO; va != 0; va += PGLSZ(1)){ + pa = va-KZERO; + ((uintptr*)L1)[PTL1X(pa, 1)] = 0; + ((uintptr*)L1)[PTL1X(va, 1)] = 0; + } + flushtlb(); + + pa = PGROUND((uintptr)end)-KZERO; + for(i=0; i<nelem(conf.mem); i++){ + if(conf.mem[i].limit <= conf.mem[i].base + || conf.mem[i].base >= PHYSDRAM + soc.dramsize){ + conf.mem[i].base = conf.mem[i].limit = 0; + continue; + } + if(conf.mem[i].limit > PHYSDRAM + soc.dramsize) + conf.mem[i].limit = PHYSDRAM + soc.dramsize; + + /* take kernel out of allocatable space */ + if(pa > conf.mem[i].base && pa < conf.mem[i].limit) + conf.mem[i].base = pa; + + kmapram(conf.mem[i].base, conf.mem[i].limit); + } + flushtlb(); + + /* rampage() is now done, count up the pages for each bank */ + for(i=0; i<nelem(conf.mem); i++) + conf.mem[i].npage = (conf.mem[i].limit - conf.mem[i].base)/BY2PG; +} + uintptr mmukmap(uintptr va, uintptr pa, usize size) { - uintptr a, pe, off, attr; + uintptr attr, off; if(va == 0) return 0; + off = pa & BY2PG-1; + attr = va & PTEMA(7); - va &= -PGLSZ(1); - off = pa % PGLSZ(1); - a = va + off; - pe = (pa + size + (PGLSZ(1)-1)) & -PGLSZ(1); - pa &= -PGLSZ(1); - while(pa < pe){ - ((uintptr*)L1)[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | PTEWRITE | PTEAF - | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | attr; - pa += PGLSZ(1); - va += PGLSZ(1); - } + attr |= PTEWRITE | PTEUXN | PTEPXN | PTESH(SHARE_OUTER); + + va &= -BY2PG; + pa &= -BY2PG; + + l1map(va, pa, pa + off + size, attr); flushtlb(); - return a; + + return va + off; } void* -vmap(uintptr pa, int) +vmap(uintptr pa, int size) { - if(soc.pciwin && pa >= soc.pciwin) - return (void*)(VIRTPCI + (pa - soc.pciwin)); - if(soc.armlocal && pa >= soc.armlocal) - return (void*)(ARMLOCAL + (pa - soc.armlocal)); - if(soc.physio && pa >= soc.physio) - return (void*)(soc.virtio + (pa - soc.physio)); - return nil; + static uintptr base = VMAP; + uintptr pe = pa + size; + uintptr va; + + va = base; + base += PGROUND(pe) - (pa & -BY2PG); + + return (void*)mmukmap(va | PTEDEVICE, pa, size); } void |