diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2018-10-20 19:56:31 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2018-10-20 19:56:31 +0200 |
commit | 83e20b4df18d539db59c8e1090f77a6565df250e (patch) | |
tree | d42f2d4c7fdd8cb1526131515690bc9229150505 | |
parent | 796e5e6000677a39577d545e4603ce251e7cbfe9 (diff) |
bcm: import changes for raspi2/3 from richard miller
30 files changed, 2458 insertions, 784 deletions
diff --git a/sys/src/9/bcm/archbcm.c b/sys/src/9/bcm/archbcm.c index eb9c4c2d8..443a9b709 100644 --- a/sys/src/9/bcm/archbcm.c +++ b/sys/src/9/bcm/archbcm.c @@ -1,5 +1,5 @@ /* - * bcm2835 (e.g. raspberry pi) architecture-specific stuff + * bcm2835 (e.g. original raspberry pi) architecture-specific stuff */ #include "u.h" @@ -13,8 +13,19 @@ #define POWERREGS (VIRTIO+0x100000) +Soc soc = { + .dramsize = 512*MiB, + .physio = 0x20000000, + .busdram = 0x40000000, + .busio = 0x7E000000, + .armlocal = 0, + .l1ptedramattrs = Cached | Buffered, + .l2ptedramattrs = Cached | Buffered, +}; + enum { Wdogfreq = 65536, + Wdogtime = 10, /* seconds, ≤ 15 */ }; /* @@ -25,6 +36,7 @@ enum { Password = 0x5A<<24, CfgMask = 0x03<<4, CfgReset = 0x02<<4, + Rsts = 0x20>>2, Wdog = 0x24>>2, }; @@ -48,13 +60,68 @@ archreboot(void) } void +wdogfeed(void) +{ + u32int *r; + + r = (u32int*)POWERREGS; + r[Wdog] = Password | (Wdogtime * Wdogfreq); + r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset; +} + +void +wdogoff(void) +{ + u32int *r; + + r = (u32int*)POWERREGS; + r[Rstc] = Password | (r[Rstc] & ~CfgMask); +} + +char * +cputype2name(char *buf, int size) +{ + seprint(buf, buf + size, "1176JZF-S"); + return buf; +} + +void cpuidprint(void) { - print("cpu%d: %dMHz ARM1176JZF-S\n", m->machno, m->cpumhz); + char name[64]; + + cputype2name(name, sizeof name); + delay(50); /* let uart catch up */ + print("cpu%d: %dMHz ARM %s\n", m->machno, m->cpumhz, name); +} + +int +getncpus(void) +{ + return 1; +} + +int +startcpus(uint) +{ + return 1; } void archbcmlink(void) { + addclock0link(wdogfeed, HZ); +} + +int +l2ap(int ap) +{ + return (AP(3, (ap))|AP(2, (ap))|AP(1, (ap))|AP(0, (ap))); +} + +int +cmpswap(long *addr, long old, long new) +{ + return cas32(addr, old, new); } diff --git a/sys/src/9/bcm/archbcm2.c b/sys/src/9/bcm/archbcm2.c new file mode 100644 index 000000000..03836ca51 --- /dev/null +++ b/sys/src/9/bcm/archbcm2.c @@ -0,0 +1,248 @@ +/* + * bcm2836 (e.g.raspberry pi 2) architecture-specific stuff + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "io.h" +#include "arm.h" + +#include "../port/netif.h" + +typedef struct Mbox Mbox; +typedef struct Mboxes Mboxes; + +#define POWERREGS (VIRTIO+0x100000) + +Soc soc = { + .dramsize = 0x3F000000, /* was 1024*MiB, but overlaps with physio */ + .physio = 0x3F000000, + .busdram = 0xC0000000, + .busio = 0x7E000000, + .armlocal = 0x40000000, + .l1ptedramattrs = Cached | Buffered | L1wralloc | L1sharable, + .l2ptedramattrs = Cached | Buffered | L2wralloc | L2sharable, +}; + +enum { + Wdogfreq = 65536, + Wdogtime = 10, /* seconds, ≤ 15 */ +}; + +/* + * Power management / watchdog registers + */ +enum { + Rstc = 0x1c>>2, + Password = 0x5A<<24, + CfgMask = 0x03<<4, + CfgReset = 0x02<<4, + Rsts = 0x20>>2, + Wdog = 0x24>>2, +}; + +/* + * Arm local regs for smp + */ +struct Mbox { + u32int doorbell; + u32int mbox1; + u32int mbox2; + u32int startcpu; +}; +struct Mboxes { + Mbox set[4]; + Mbox clr[4]; +}; + +enum { + Mboxregs = 0x80 +}; + +static Lock startlock[MAXMACH + 1]; + +void +archreset(void) +{ + fpon(); +} + +void +archreboot(void) +{ + u32int *r; + + r = (u32int*)POWERREGS; + r[Wdog] = Password | 1; + r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset; + coherence(); + for(;;) + ; +} + +void +wdogfeed(void) +{ + u32int *r; + + r = (u32int*)POWERREGS; + r[Wdog] = Password | (Wdogtime * Wdogfreq); + r[Rstc] = Password | (r[Rstc] & ~CfgMask) | CfgReset; +} + +void +wdogoff(void) +{ + u32int *r; + + r = (u32int*)POWERREGS; + r[Rstc] = Password | (r[Rstc] & ~CfgMask); +} + + +char * +cputype2name(char *buf, int size) +{ + u32int r; + uint part; + char *p; + + r = cpidget(); /* main id register */ + assert((r >> 24) == 'A'); + part = (r >> 4) & MASK(12); + switch(part){ + case 0xc07: + p = seprint(buf, buf + size, "Cortex-A7"); + break; + case 0xd03: + p = seprint(buf, buf + size, "Cortex-A53"); + break; + default: + p = seprint(buf, buf + size, "Unknown-%#x", part); + break; + } + seprint(p, buf + size, " r%ldp%ld", + (r >> 20) & MASK(4), r & MASK(4)); + return buf; +} + +void +cpuidprint(void) +{ + char name[64]; + + cputype2name(name, sizeof name); + delay(50); /* let uart catch up */ + print("cpu%d: %dMHz ARM %s\n", m->machno, m->cpumhz, name); +} + +int +getncpus(void) +{ + int n, max; + char *p; + + n = 4; + if(n > MAXMACH) + n = MAXMACH; + p = getconf("*ncpu"); + if(p && (max = atoi(p)) > 0 && n > max) + n = max; + return n; +} + +static int +startcpu(uint cpu) +{ + Mboxes *mb; + int i; + void cpureset(); + + mb = (Mboxes*)(ARMLOCAL + Mboxregs); + if(mb->clr[cpu].startcpu) + return -1; + mb->set[cpu].startcpu = PADDR(cpureset); + coherence(); + sev(); + for(i = 0; i < 1000; i++) + if(mb->clr[cpu].startcpu == 0) + return 0; + mb->clr[cpu].startcpu = PADDR(cpureset); + mb->set[cpu].doorbell = 1; + return 0; +} + +void +mboxclear(uint cpu) +{ + Mboxes *mb; + + mb = (Mboxes*)(ARMLOCAL + Mboxregs); + mb->clr[cpu].mbox1 = 1; +} + +void +wakecpu(uint cpu) +{ + Mboxes *mb; + + mb = (Mboxes*)(ARMLOCAL + Mboxregs); + mb->set[cpu].mbox1 = 1; +} + +int +startcpus(uint ncpu) +{ + int i, timeout; + + for(i = 0; i < ncpu; i++) + lock(&startlock[i]); + cachedwbse(startlock, sizeof startlock); + for(i = 1; i < ncpu; i++){ + if(startcpu(i) < 0) + return i; + timeout = 10000000; + while(!canlock(&startlock[i])) + if(--timeout == 0) + return i; + unlock(&startlock[i]); + } + return ncpu; +} + +void +archbcm2link(void) +{ + addclock0link(wdogfeed, HZ); +} + +int +l2ap(int ap) +{ + return (AP(0, (ap))); +} + +void +cpustart(int cpu) +{ + Mboxes *mb; + + up = nil; + machinit(); + mb = (Mboxes*)(ARMLOCAL + Mboxregs); + mb->clr[cpu].doorbell = 1; + trapinit(); + clockinit(); + mmuinit1(); + timersinit(); + cpuidprint(); + archreset(); + active.machs[m->machno] = 1; + unlock(&startlock[cpu]); + schedinit(); + panic("schedinit returned"); +} diff --git a/sys/src/9/bcm/arm.h b/sys/src/9/bcm/arm.h index 534f42d29..f730a3885 100644 --- a/sys/src/9/bcm/arm.h +++ b/sys/src/9/bcm/arm.h @@ -1,5 +1,5 @@ /* - * arm-specific definitions for armv6 + * arm-specific definitions for armv6 (arm11), armv7 (cortex-a8 and -a7) * these are used in C and assembler */ @@ -12,6 +12,7 @@ #define PsrMsvc 0x00000013 /* `protected mode for OS' */ #define PsrMmon 0x00000016 /* `secure monitor' (trustzone hyper) */ #define PsrMabt 0x00000017 +#define PsrMhyp 0x0000001A #define PsrMund 0x0000001B #define PsrMsys 0x0000001F /* `privileged user mode for OS' (trustzone) */ #define PsrMask 0x0000001F @@ -52,9 +53,19 @@ #define CpTLD 10 /* TLB Lockdown, with op2 */ #define CpVECS 12 /* vector bases, op1==0, Crm==0, op2s (cortex) */ #define CpPID 13 /* Process ID */ +#define CpTIMER 14 /* Generic timer (cortex-a7) */ #define CpSPM 15 /* system performance monitor (arm1176) */ /* + * CpTIMER op1==0 Crm and opcode2 registers (cortex-a7) + */ +#define CpTIMERcntfrq 0 +#define CpTIMERphys 2 + +#define CpTIMERphysval 0 +#define CpTIMERphysctl 1 + +/* * CpTTB op1==0, Crm==0 opcode2 values. */ #define CpTTB0 0 @@ -71,6 +82,7 @@ * CpID Secondary (CRm) registers. */ #define CpIDidct 0 +#define CpIDfeat 1 /* * CpID op1==0 opcode2 fields. @@ -80,6 +92,7 @@ #define CpIDct 1 /* cache type */ #define CpIDtlb 3 /* tlb type (cortex) */ #define CpIDmpid 5 /* multiprocessor id (cortex) */ +#define CpIDrevid 6 /* extra revision ID */ /* CpIDid op1 values */ #define CpIDcsize 1 /* cache size (cortex) */ @@ -133,6 +146,10 @@ #define CpACasa (1<<4) /* enable speculative accesses */ #define CpACl1pe (1<<3) /* l1 cache parity enable */ #define CpACl2en (1<<1) /* l2 cache enable; default 1 */ + +/* cortex-a7 and cortex-a9 */ +#define CpACsmp (1<<6) /* SMP l1 caches coherence; needed for ldrex/strex */ +#define CpACl1pctl (3<<13) /* l1 prefetch control */ /* * CpCONTROL Secondary (CRm) registers and opcode2 fields. */ @@ -151,9 +168,9 @@ #define CpCACHEinvd 6 /* data or unified */ #define CpCACHEinvu 7 /* unified (not on cortex) */ #define CpCACHEva2pa 8 /* va -> pa translation (cortex) */ -#define CpCACHEwb 10 /* writeback */ -#define CpCACHEinvdse 11 /* data or unified by mva */ -#define CpCACHEwbi 14 /* writeback+invalidate */ +#define CpCACHEwb 10 /* writeback to PoC */ +#define CpCACHEwbu 11 /* writeback to PoU */ +#define CpCACHEwbi 14 /* writeback+invalidate (to PoC) */ #define CpCACHEall 0 /* entire (not for invd nor wb(i) on cortex) */ #define CpCACHEse 1 /* single entry */ @@ -223,7 +240,7 @@ #define CpVECSmon 1 /* secure monitor base addr */ /* - * CpSPM Secondary (CRm) registers and opcode2 fields. + * CpSPM Secondary (CRm) registers and opcode2 fields (armv6) */ #define CpSPMperf 12 /* various counters */ @@ -239,6 +256,21 @@ #define CpCACHERANGEdwbi 14 /* writeback+invalidate */ /* + * CpTTB cache control bits + */ +#define CpTTBnos (1<<5) /* only Inner cache shareable */ +#define CpTTBinc (0<<0|0<<6) /* inner non-cacheable */ +#define CpTTBiwba (0<<0|1<<6) /* inner write-back write-allocate */ +#define CpTTBiwt (1<<0|0<<6) /* inner write-through */ +#define CpTTBiwb (1<<0|1<<6) /* inner write-back no write-allocate */ +#define CpTTBonc (0<<3) /* outer non-cacheable */ +#define CpTTBowba (1<<3) /* outer write-back write-allocate */ +#define CpTTBowt (2<<3) /* outer write-through */ +#define CpTTBowb (3<<3) /* outer write-back no write-allocate */ +#define CpTTBs (1<<1) /* page table in shareable memory */ +#define CpTTBbase ~0x7F /* mask off control bits */ + +/* * MMU page table entries. * Mbz (0x10) bit is implementation-defined and must be 0 on the cortex. */ @@ -256,6 +288,15 @@ #define Cached 0x00000008 /* L[12] */ #define Dom0 0 +#define L1wralloc (1<<12) /* L1 TEX */ +#define L1sharable (1<<16) +#define L2wralloc (1<<6) /* L2 TEX (small pages) */ +#define L2sharable (1<<10) + +/* attributes for memory containing locks -- differs between armv6 and armv7 */ +//#define L1ptedramattrs (Cached | Buffered | L1wralloc | L1sharable) +//#define L2ptedramattrs (Cached | Buffered | L2wralloc | L2sharable) + #define Noaccess 0 /* AP, DAC */ #define Krw 1 /* AP */ /* armv7 deprecates AP[2] == 1 & AP[1:0] == 2 (Uro), prefers 3 (new in v7) */ @@ -267,7 +308,7 @@ #define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) #define AP(n, v) F((v), ((n)*2)+4, 2) #define L1AP(ap) (AP(3, (ap))) -#define L2AP(ap) (AP(3, (ap))|AP(2, (ap))|AP(1, (ap))|AP(0, (ap))) /* pre-armv7 */ +/* L2AP differs between armv6 and armv7 -- see l2ap in arch*.c */ #define DAC(n, v) F((v), (n)*2, 2) #define HVECTORS 0xffff0000 diff --git a/sys/src/9/bcm/arm.s b/sys/src/9/bcm/arm.s index 2dbb8778a..661b7136f 100644 --- a/sys/src/9/bcm/arm.s +++ b/sys/src/9/bcm/arm.s @@ -1,5 +1,5 @@ /* - * armv6 machine assist, definitions + * armv6/v7 machine assist, definitions * * loader uses R11 as scratch. */ @@ -11,8 +11,6 @@ #define L1X(va) (((((va))>>20) & 0x0fff)<<2) -#define PTEDRAM (Dom0|L1AP(Krw)|Section|Cached|Buffered) - /* * new instructions */ @@ -25,12 +23,32 @@ MOVW $0, R0; \ MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait -#define BARRIERS ISB; DSB +#define BARRIERS DSB; ISB #define MCRR(coproc, op, rd, rn, crm) \ WORD $(0xec400000|(rn)<<16|(rd)<<12|(coproc)<<8|(op)<<4|(crm)) +#define MRRC(coproc, op, rd, rn, crm) \ + WORD $(0xec500000|(rn)<<16|(rd)<<12|(coproc)<<8|(op)<<4|(crm)) +#define MSR(R, rn, m, m1) \ + WORD $(0xe120f200|(R)<<22|(m1)<<16|(m)<<8|(rn)) + +#define CPSIE WORD $0xf1080080 /* intr enable: zeroes I bit */ +#define CPSID WORD $0xf10c0080 /* intr disable: sets I bit */ #define OKAY \ MOVW $0x7E200028,R2; \ MOVW $0x10000,R3; \ MOVW R3,(R2) + +#define PUTC(s) + +/* + * get cpu id, or zero if armv6 + */ +#define CPUID(r) \ + MRC CpSC, 0, r, C(CpID), C(CpIDfeat), 7; \ + CMP $0, r; \ + B.EQ 2(PC); \ + MRC CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \ + AND.S $(MAXMACH-1), r + diff --git a/sys/src/9/bcm/armv6.s b/sys/src/9/bcm/armv6.s new file mode 100644 index 000000000..68714f924 --- /dev/null +++ b/sys/src/9/bcm/armv6.s @@ -0,0 +1,324 @@ +/* + * Broadcom bcm2835 SoC, as used in Raspberry Pi + * arm1176jzf-s processor (armv6) + */ + +#include "arm.s" + +#define CACHELINESZ 32 + +TEXT armstart(SB), 1, $-4 + + /* + * SVC mode, interrupts disabled + */ + MOVW $(PsrDirq|PsrDfiq|PsrMsvc), R1 + MOVW R1, CPSR + + /* + * disable the mmu and L1 caches + * invalidate caches and tlb + */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BIC $(CpCdcache|CpCicache|CpCpredict|CpCmmu), R1 + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvu), CpCACHEall + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv + ISB + + /* + * clear mach and page tables + */ + MOVW $PADDR(MACHADDR), R1 + MOVW $PADDR(KTZERO), R2 +_ramZ: + MOVW R0, (R1) + ADD $4, R1 + CMP R1, R2 + BNE _ramZ + + /* + * start stack at top of mach (physical addr) + * set up page tables for kernel + */ + MOVW $PADDR(MACHADDR+MACHSIZE-4), R13 + MOVW $PADDR(L1), R0 + BL ,mmuinit(SB) + + /* + * set up domain access control and page table base + */ + MOVW $Client, R1 + MCR CpSC, 0, R1, C(CpDAC), C(0) + MOVW $PADDR(L1), R1 + MCR CpSC, 0, R1, C(CpTTB), C(0) + + /* + * enable caches, mmu, and high vectors + */ + MRC CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + ORR $(CpChv|CpCdcache|CpCicache|CpCpredict|CpCmmu), R0 + MCR CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + ISB + + /* + * switch SB, SP, and PC into KZERO space + */ + MOVW $setR12(SB), R12 + MOVW $(MACHADDR+MACHSIZE-4), R13 + MOVW $_startpg(SB), R15 + +TEXT _startpg(SB), 1, $-4 + + /* + * enable cycle counter + */ + MOVW $1, R1 + MCR CpSC, 0, R1, C(CpSPM), C(CpSPMperf), CpSPMctl + + /* + * call main and loop forever if it returns + */ + BL ,main(SB) + B ,0(PC) + + BL _div(SB) /* hack to load _div, etc. */ + +TEXT cpidget(SB), 1, $-4 /* main ID */ + MRC CpSC, 0, R0, C(CpID), C(0), CpIDid + RET + +TEXT fsrget(SB), 1, $-4 /* data fault status */ + MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata + RET + +TEXT ifsrget(SB), 1, $-4 /* instruction fault status */ + MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst + RET + +TEXT farget(SB), 1, $-4 /* fault address */ + MRC CpSC, 0, R0, C(CpFAR), C(0x0) + RET + +TEXT lcycles(SB), 1, $-4 + MRC CpSC, 0, R0, C(CpSPM), C(CpSPMperf), CpSPMcyc + RET + +TEXT splhi(SB), 1, $-4 + MOVW $(MACHADDR+4), R2 /* save caller pc in Mach */ + MOVW R14, 0(R2) + + MOVW CPSR, R0 /* turn off irqs (but not fiqs) */ + ORR $(PsrDirq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splfhi(SB), 1, $-4 + MOVW $(MACHADDR+4), R2 /* save caller pc in Mach */ + MOVW R14, 0(R2) + + MOVW CPSR, R0 /* turn off irqs and fiqs */ + ORR $(PsrDirq|PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splflo(SB), 1, $-4 + MOVW CPSR, R0 /* turn on fiqs */ + BIC $(PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT spllo(SB), 1, $-4 + MOVW CPSR, R0 /* turn on irqs and fiqs */ + BIC $(PsrDirq|PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splx(SB), 1, $-4 + MOVW $(MACHADDR+0x04), R2 /* save caller pc in Mach */ + MOVW R14, 0(R2) + + MOVW R0, R1 /* reset interrupt level */ + MOVW CPSR, R0 + MOVW R1, CPSR + RET + +TEXT spldone(SB), 1, $0 /* end marker for devkprof.c */ + RET + +TEXT islo(SB), 1, $-4 + MOVW CPSR, R0 + AND $(PsrDirq), R0 + EOR $(PsrDirq), R0 + RET + +TEXT tas(SB), $-4 +TEXT _tas(SB), $-4 + MOVW R0,R1 + MOVW $1,R0 + SWPW R0,(R1) /* fix: deprecated in armv6 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVW R13, 0(R0) /* sp */ + MOVW R14, 4(R0) /* pc */ + MOVW $0, R0 + RET + +TEXT gotolabel(SB), 1, $-4 + MOVW 0(R0), R13 /* sp */ + MOVW 4(R0), R14 /* pc */ + MOVW $1, R0 + RET + +TEXT getcallerpc(SB), 1, $-4 + MOVW 0(R13), R0 + RET + +TEXT idlehands(SB), $-4 + MOVW CPSR, R3 + ORR $(PsrDirq|PsrDfiq), R3, R1 /* splfhi */ + MOVW R1, CPSR + + DSB + MOVW nrdy(SB), R0 + CMP $0, R0 + MCR.EQ CpSC, 0, R0, C(CpCACHE), C(CpCACHEintr), CpCACHEwait + DSB + + MOVW R3, CPSR /* splx */ + RET + + +TEXT coherence(SB), $-4 + BARRIERS + RET + +/* + * invalidate tlb + */ +TEXT mmuinvalidate(SB), 1, $-4 + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv + BARRIERS + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc + RET + +/* + * mmuinvalidateaddr(va) + * invalidate tlb entry for virtual page address va, ASID 0 + */ +TEXT mmuinvalidateaddr(SB), 1, $-4 + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse + BARRIERS + RET + +/* + * drain write buffer + * writeback data cache + */ +TEXT cachedwb(SB), 1, $-4 + DSB + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEall + RET + +/* + * drain write buffer + * writeback and invalidate data cache + */ +TEXT cachedwbinv(SB), 1, $-4 + DSB + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall + RET + +/* + * cachedwbinvse(va, n) + * drain write buffer + * writeback and invalidate data cache range [va, va+n) + */ +TEXT cachedwbinvse(SB), 1, $-4 + MOVW R0, R1 /* DSB clears R0 */ + DSB + MOVW n+4(FP), R2 + ADD R1, R2 + SUB $1, R2 + BIC $(CACHELINESZ-1), R1 + BIC $(CACHELINESZ-1), R2 + MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwbi) + RET + +/* + * cachedwbse(va, n) + * drain write buffer + * writeback data cache range [va, va+n) + */ +TEXT cachedwbtlb(SB), 1, $-4 +TEXT cachedwbse(SB), 1, $-4 + + MOVW R0, R1 /* DSB clears R0 */ + DSB + MOVW n+4(FP), R2 + ADD R1, R2 + BIC $(CACHELINESZ-1), R1 + BIC $(CACHELINESZ-1), R2 + MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwb) + RET + +/* + * cachedinvse(va, n) + * drain write buffer + * invalidate data cache range [va, va+n) + */ +TEXT cachedinvse(SB), 1, $-4 + MOVW R0, R1 /* DSB clears R0 */ + DSB + MOVW n+4(FP), R2 + ADD R1, R2 + SUB $1, R2 + BIC $(CACHELINESZ-1), R1 + BIC $(CACHELINESZ-1), R2 + MCRR(CpSC, 0, 2, 1, CpCACHERANGEinvd) + RET + +/* + * drain write buffer and prefetch buffer + * writeback and invalidate data cache + * invalidate instruction cache + */ +TEXT cacheuwbinv(SB), 1, $-4 + BARRIERS + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall + RET + +/* + * L2 cache is not enabled + */ +TEXT l2cacheuwbinv(SB), 1, $-4 + RET + +/* + * invalidate instruction cache + */ +TEXT cacheiinv(SB), 1, $-4 + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall + RET + +/* + * invalidate range of instruction cache + */ +TEXT cacheiinvse(SB), 1, $-4 + MOVW R0, R1 /* DSB clears R0 */ + DSB + MOVW n+4(FP), R2 + ADD R1, R2 + SUB $1, R2 + MCRR(CpSC, 0, 2, 1, CpCACHERANGEinvi) + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc + DSB + ISB + RET diff --git a/sys/src/9/bcm/armv7.s b/sys/src/9/bcm/armv7.s new file mode 100644 index 000000000..27f2e49c8 --- /dev/null +++ b/sys/src/9/bcm/armv7.s @@ -0,0 +1,510 @@ +/* + * Broadcom bcm2836 SoC, as used in Raspberry Pi 2 + * 4 x Cortex-A7 processor (armv7) + */ + +#include "arm.s" + +#define CACHELINESZ 64 +#define ICACHELINESZ 32 + +#undef DSB +#undef DMB +#undef ISB +#define DSB WORD $0xf57ff04f /* data synch. barrier; last f = SY */ +#define DMB WORD $0xf57ff05f /* data mem. barrier; last f = SY */ +#define ISB WORD $0xf57ff06f /* instr. sync. barrier; last f = SY */ +#define WFI WORD $0xe320f003 /* wait for interrupt */ +#define WFI_EQ WORD $0x0320f003 /* wait for interrupt if eq */ +#define ERET WORD $0xe160006e /* exception return from HYP */ +#define SEV WORD $0xe320f004 /* send event */ + +/* tas/cas strex debugging limits; started at 10000 */ +#define MAXSC 1000000 + +TEXT armstart(SB), 1, $-4 + + /* + * if not cpu0, go to secondary startup + */ + CPUID(R1) + BNE reset + + /* + * go to SVC mode, interrupts disabled + */ + BL svcmode(SB) + + /* + * disable the mmu and caches + */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BIC $(CpCdcache|CpCicache|CpCmmu), R1 + ORR $(CpCsbo|CpCsw), R1 + BIC $CpCsbz, R1 + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BARRIERS + + /* + * clear mach and page tables + */ + MOVW $PADDR(MACHADDR), R1 + MOVW $PADDR(KTZERO), R2 +_ramZ: + MOVW R0, (R1) + ADD $4, R1 + CMP R1, R2 + BNE _ramZ + + /* + * turn SMP on + * invalidate tlb + */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl + ORR $CpACsmp, R1 /* turn SMP on */ + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl + BARRIERS + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv + BARRIERS + + /* + * start stack at top of mach (physical addr) + * set up page tables for kernel + */ + MOVW $PADDR(MACHADDR+MACHSIZE-4), R13 + MOVW $PADDR(L1), R0 + BL mmuinit(SB) + + /* + * set up domain access control and page table base + */ + MOVW $Client, R1 + MCR CpSC, 0, R1, C(CpDAC), C(0) + MOVW $PADDR(L1), R1 + ORR $(CpTTBs|CpTTBowba|CpTTBiwba), R1 + MCR CpSC, 0, R1, C(CpTTB), C(0) + MCR CpSC, 0, R1, C(CpTTB), C(0), CpTTB1 /* cortex has two */ + + /* + * invalidate my caches before enabling + */ + BL cachedinv(SB) + BL cacheiinv(SB) + BL l2cacheuinv(SB) + BARRIERS + + /* + * enable caches, mmu, and high vectors + */ + + MRC CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + ORR $(CpChv|CpCdcache|CpCicache|CpCmmu), R0 + MCR CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + BARRIERS + + /* + * switch SB, SP, and PC into KZERO space + */ + MOVW $setR12(SB), R12 + MOVW $(MACHADDR+MACHSIZE-4), R13 + MOVW $_startpg(SB), R15 + +TEXT _startpg(SB), 1, $-4 + + /* + * enable cycle counter + */ + MOVW $(1<<31), R1 + MCR CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenacyc + MOVW $1, R1 + MCR CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenapmnc + + /* + * call main and loop forever if it returns + */ + BL ,main(SB) + B ,0(PC) + + BL _div(SB) /* hack to load _div, etc. */ + +/* + * startup entry for cpu(s) other than 0 + */ +TEXT cpureset(SB), 1, $-4 +reset: + /* + * load physical base for SB addressing while mmu is off + * keep a handy zero in R0 until first function call + */ + MOVW $setR12(SB), R12 + SUB $KZERO, R12 + ADD $PHYSDRAM, R12 + MOVW $0, R0 + + /* + * SVC mode, interrupts disabled + */ + BL svcmode(SB) + + /* + * disable the mmu and caches + */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BIC $(CpCdcache|CpCicache|CpCmmu), R1 + ORR $(CpCsbo|CpCsw), R1 + BIC $CpCsbz, R1 + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BARRIERS + + /* + * turn SMP on + * invalidate tlb + */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl + ORR $CpACsmp, R1 /* turn SMP on */ + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpAuxctl + BARRIERS + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv + BARRIERS + + /* + * find Mach for this cpu + */ + MRC CpSC, 0, R2, C(CpID), C(CpIDidct), CpIDmpid + AND $(MAXMACH-1), R2 /* mask out non-cpu-id bits */ + SLL $2, R2 /* convert to word index */ + MOVW $machaddr(SB), R0 + ADD R2, R0 /* R0 = &machaddr[cpuid] */ + MOVW (R0), R0 /* R0 = machaddr[cpuid] */ + CMP $0, R0 + BEQ 0(PC) /* must not be zero */ + SUB $KZERO, R0, R(MACH) /* m = PADDR(machaddr[cpuid]) */ + + /* + * start stack at top of local Mach + */ + ADD $(MACHSIZE-4), R(MACH), R13 + + /* + * set up domain access control and page table base + */ + MOVW $Client, R1 + MCR CpSC, 0, R1, C(CpDAC), C(0) + MOVW 12(R(MACH)), R1 /* m->mmul1 */ + SUB $KZERO, R1 /* phys addr */ + ORR $(CpTTBs|CpTTBowba|CpTTBiwba), R1 + MCR CpSC, 0, R1, C(CpTTB), C(0) + MCR CpSC, 0, R1, C(CpTTB), C(0), CpTTB1 /* cortex has two */ + + /* + * invalidate my caches before enabling + */ + BL cachedinv(SB) + BL cacheiinv(SB) + BARRIERS + + /* + * enable caches, mmu, and high vectors + */ + MRC CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + ORR $(CpChv|CpCdcache|CpCicache|CpCmmu), R0 + MCR CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl + BARRIERS + + /* + * switch MACH, SB, SP, and PC into KZERO space + */ + ADD $KZERO, R(MACH) + MOVW $setR12(SB), R12 + ADD $KZERO, R13 + MOVW $_startpg2(SB), R15 + +TEXT _startpg2(SB), 1, $-4 + + /* + * enable cycle counter + */ + MOVW $(1<<31), R1 + MCR CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenacyc + MOVW $1, R1 + MCR CpSC, 0, R1, C(CpCLD), C(CpCLDena), CpCLDenapmnc + + /* + * call cpustart and loop forever if it returns + */ + MRC CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDmpid + AND $(MAXMACH-1), R0 /* mask out non-cpu-id bits */ + BL ,cpustart(SB) + B ,0(PC) + +/* + * get into SVC mode with interrupts disabled + * raspberry pi firmware since 29 Sept 2015 starts in HYP mode + */ +TEXT svcmode(SB), 1, $-4 + MOVW CPSR, R1 + AND $PsrMask, R1 + MOVW $PsrMhyp, R2 + CMP R2, R1 + MOVW $(PsrDirq|PsrDfiq|PsrMsvc), R1 + BNE nothyp + MSR(1, 1, 1, 0xe) /* MOVW R1, SPSR_HYP */ + MSR(0, 14, 1, 0xe) /* MOVW R14, ELR_HYP */ + ERET +nothyp: + MOVW R1, CPSR + RET + +TEXT cpidget(SB), 1, $-4 /* main ID */ + MRC CpSC, 0, R0, C(CpID), C(0), CpIDid + RET + +TEXT fsrget(SB), 1, $-4 /* data fault status */ + MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata + RET + +TEXT ifsrget(SB), 1, $-4 /* instruction fault status */ + MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst + RET + +TEXT farget(SB), 1, $-4 /* fault address */ + MRC CpSC, 0, R0, C(CpFAR), C(0x0) + RET + +TEXT cpctget(SB), 1, $-4 /* cache type */ + MRC CpSC, 0, R0, C(CpID), C(CpIDidct), CpIDct + RET + +TEXT lcycles(SB), 1, $-4 + MRC CpSC, 0, R0, C(CpCLD), C(CpCLDcyc), 0 + RET + +TEXT splhi(SB), 1, $-4 + MOVW R14, 4(R(MACH)) /* save caller pc in m->splpc */ + + MOVW CPSR, R0 /* turn off irqs (but not fiqs) */ + ORR $(PsrDirq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splfhi(SB), 1, $-4 + MOVW R14, 4(R(MACH)) /* save caller pc in m->splpc */ + + MOVW CPSR, R0 /* turn off irqs and fiqs */ + ORR $(PsrDirq|PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splflo(SB), 1, $-4 + MOVW CPSR, R0 /* turn on fiqs */ + BIC $(PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT spllo(SB), 1, $-4 + MOVW CPSR, R0 /* turn on irqs and fiqs */ + MOVW $0, R1 + CMP.S R1, R(MACH) + MOVW.NE R1, 4(R(MACH)) /* clear m->splpc */ + BIC $(PsrDirq|PsrDfiq), R0, R1 + MOVW R1, CPSR + RET + +TEXT splx(SB), 1, $-4 + MOVW R14, 4(R(MACH)) /* save caller pc in m->splpc */ + + MOVW R0, R1 /* reset interrupt level */ + MOVW CPSR, R0 + MOVW R1, CPSR + RET + +TEXT spldone(SB), 1, $0 /* end marker for devkprof.c */ + RET + +TEXT islo(SB), 1, $-4 + MOVW CPSR, R0 + AND $(PsrDirq), R0 + EOR $(PsrDirq), R0 + RET + +TEXT cas(SB), $0 +TEXT cmpswap(SB), $0 + MOVW ov+4(FP), R1 + MOVW nv+8(FP), R2 +spincas: + LDREX (R0), R3 + CMP.S R3, R1 + BNE fail + STREX R2, (R0), R4 + CMP.S $0, R4 + BNE spincas + MOVW $1, R0 + DMB + RET +fail: + CLREX + MOVW $0, R0 + RET + +TEXT tas(SB), $-4 +TEXT _tas(SB), $-4 /* _tas(ulong *) */ + /* returns old (R0) after modifying (R0) */ + MOVW R0,R5 + DMB + + MOVW $1,R2 /* new value of (R0) */ + MOVW $MAXSC, R8 +tas1: + LDREX (R5), R7 + CMP.S $0, R7 /* old value non-zero (lock taken)? */ + BNE lockbusy /* we lose */ + SUB.S $1, R8 + BEQ lockloop2 + STREX R2,(R5),R4 + CMP.S $0, R4 + BNE tas1 /* strex failed? try again */ + DMB + B tas0 +lockloop2: + BL abort(SB) +lockbusy: + CLREX +tas0: + MOVW R7, R0 /* return old value */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVW R13, 0(R0) /* sp */ + MOVW R14, 4(R0) /* pc */ + MOVW $0, R0 + RET + +TEXT gotolabel(SB), 1, $-4 + MOVW 0(R0), R13 /* sp */ + MOVW 4(R0), R14 /* pc */ + MOVW $1, R0 + RET + +TEXT getcallerpc(SB), 1, $-4 + MOVW 0(R13), R0 + RET + +TEXT idlehands(SB), $-4 + MOVW CPSR, R3 + ORR $(PsrDirq|PsrDfiq), R3, R1 /* splfhi */ + MOVW R1, CPSR + + DSB + MOVW nrdy(SB), R0 + CMP $0, R0 + WFI_EQ + DSB + + MOVW R3, CPSR /* splx */ + RET + + +TEXT coherence(SB), $-4 + BARRIERS + RET + +TEXT sev(SB), $-4 + SEV + RET + +/* + * invalidate tlb + */ +TEXT mmuinvalidate(SB), 1, $-4 + DSB + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv + BARRIERS + RET + +/* + * mmuinvalidateaddr(va) + * invalidate tlb entry for virtual page address va, ASID 0 + */ +TEXT mmuinvalidateaddr(SB), 1, $-4 + DSB + MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse + BARRIERS + RET + +/* + * `single-element' cache operations. + * in arm arch v7, if effective to PoC, they operate on all cache levels, so separate + * l2 functions are unnecessary. + */ + +TEXT cachedwbse(SB), $-4 /* D writeback SE */ + MOVW R0, R2 + + MOVW CPSR, R3 + CPSID /* splhi */ + + BARRIERS /* force outstanding stores to cache */ + MOVW R2, R0 + MOVW 4(FP), R1 + ADD R0, R1 /* R1 is end address */ + BIC $(CACHELINESZ-1), R0 /* cache line start */ +_dwbse: + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEse + /* can't have a BARRIER here since it zeroes R0 */ + ADD $CACHELINESZ, R0 + CMP.S R0, R1 + BGT _dwbse + B _wait + +/* + * TLB on armv7 loads from cache, so no need for writeback + */ +TEXT cachedwbtlb(SB), $-4 + DSB + ISB + RET + +TEXT cachedwbinvse(SB), $-4 /* D writeback+invalidate SE */ + MOVW R0, R2 + + MOVW CPSR, R3 + CPSID /* splhi */ + + BARRIERS /* force outstanding stores to cache */ + MOVW R2, R0 + MOVW 4(FP), R1 + ADD R0, R1 /* R1 is end address */ + BIC $(CACHELINESZ-1), R0 /* cache line start */ +_dwbinvse: + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEse + /* can't have a BARRIER here since it zeroes R0 */ + ADD $CACHELINESZ, R0 + CMP.S R0, R1 + BGT _dwbinvse +_wait: /* drain write buffer */ + BARRIERS + + MOVW R3, CPSR /* splx */ + RET + +TEXT cachedinvse(SB), $-4 /* D invalidate SE */ + MOVW R0, R2 + + MOVW CPSR, R3 + CPSID /* splhi */ + + BARRIERS /* force outstanding stores to cache */ + MOVW R2, R0 + MOVW 4(FP), R1 + ADD R0, R1 /* R1 is end address */ + BIC $(CACHELINESZ-1), R0 /* cache line start */ +_dinvse: + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEse + /* can't have a BARRIER here since it zeroes R0 */ + ADD $CACHELINESZ, R0 + CMP.S R0, R1 + BGT _dinvse + B _wait + +#include "cache.v7.s" diff --git a/sys/src/9/bcm/cache.v7.s b/sys/src/9/bcm/cache.v7.s new file mode 100644 index 000000000..ee5d57905 --- /dev/null +++ b/sys/src/9/bcm/cache.v7.s @@ -0,0 +1,220 @@ +/* + * cortex arm arch v7 cache flushing and invalidation + * shared by l.s and rebootcode.s + */ + +#define BPIALL MCR CpSC, 0, R0, C(CpCACHE), C(5), 6 /* branch predictor invalidate all */ + +TEXT cacheiinv(SB), $-4 /* I invalidate */ + DSB + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* ok on cortex */ + BPIALL /* redundant? */ + DSB + ISB + RET + +TEXT cacheiinvse(SB), $0 /* I invalidate SE */ + MOVW 4(FP), R1 + ADD R0, R1 + BIC $(ICACHELINESZ - 1), R0 + DSB +_iinvse: + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEse + ADD $ICACHELINESZ, R0 + CMP.S R0, R1 + BGT _iinvse + BPIALL + DSB + ISB + RET + +/* + * set/way operators, passed a suitable set/way value in R0. + */ +TEXT cachedwb_sw(SB), $-4 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEsi + RET + +TEXT cachedwbinv_sw(SB), $-4 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEsi + RET + +TEXT cachedinv_sw(SB), $-4 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvd), CpCACHEsi + RET + + /* set cache size select */ +TEXT setcachelvl(SB), $-4 + MCR CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 + ISB + RET + + /* return cache sizes */ +TEXT getwayssets(SB), $-4 + MRC CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 + RET + +/* + * l1 cache operations. + * l1 and l2 ops are intended to be called from C, thus need save no + * caller's regs, only those we need to preserve across calls. + */ + +TEXT cachedwb(SB), $-4 + MOVW.W R14, -8(R13) + MOVW $cachedwb_sw(SB), R0 + MOVW $1, R8 + BL wholecache(SB) + MOVW.P 8(R13), R15 + +TEXT cachedwbinv(SB), $-4 + MOVW.W R14, -8(R13) + MOVW $cachedwbinv_sw(SB), R0 + MOVW $1, R8 + BL wholecache(SB) + MOVW.P 8(R13), R15 + +TEXT cachedinv(SB), $-4 + MOVW.W R14, -8(R13) + MOVW $cachedinv_sw(SB), R0 + MOVW $1, R8 + BL wholecache(SB) + MOVW.P 8(R13), R15 + +TEXT cacheuwbinv(SB), $-4 + MOVM.DB.W [R14], (R13) /* save lr on stack */ + MOVW CPSR, R1 + CPSID /* splhi */ + + MOVM.DB.W [R1], (R13) /* save R1 on stack */ + + BL cachedwbinv(SB) + BL cacheiinv(SB) + + MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */ + MOVW R1, CPSR + MOVM.IA.W (R13), [R14] /* restore lr */ + RET + +/* + * l2 cache operations + */ + +TEXT l2cacheuwb(SB), $-4 + MOVW.W R14, -8(R13) + MOVW $cachedwb_sw(SB), R0 + MOVW $2, R8 + BL wholecache(SB) + MOVW.P 8(R13), R15 + +TEXT l2cacheuwbinv(SB), $-4 + MOVW.W R14, -8(R13) + MOVW CPSR, R1 + CPSID /* splhi */ + + MOVM.DB.W [R1], (R13) /* save R1 on stack */ + + MOVW $cachedwbinv_sw(SB), R0 + MOVW $2, R8 + BL wholecache(SB) + BL l2cacheuinv(SB) + + MOVM.IA.W (R13), [R1] /* restore R1 (saved CPSR) */ + MOVW R1, CPSR + MOVW.P 8(R13), R15 + +TEXT l2cacheuinv(SB), $-4 + MOVW.W R14, -8(R13) + MOVW $cachedinv_sw(SB), R0 + MOVW $2, R8 + BL wholecache(SB) + MOVW.P 8(R13), R15 + +/* + * these shift values are for the Cortex-A8 L1 cache (A=2, L=6) and + * the Cortex-A8 L2 cache (A=3, L=6). + * A = log2(# of ways), L = log2(bytes per cache line). + * see armv7 arch ref p. 1403. + */ +#define L1WAYSH 30 +#define L1SETSH 6 +#define L2WAYSH 29 +#define L2SETSH 6 + +/* + * callers are assumed to be the above l1 and l2 ops. + * R0 is the function to call in the innermost loop. + * R8 is the cache level (one-origin: 1 or 2). + * + * initial translation by 5c, then massaged by hand. + */ +TEXT wholecache+0(SB), $-4 + MOVW R0, R1 /* save argument for inner loop in R1 */ + SUB $1, R8 /* convert cache level to zero origin */ + + /* we may not have the MMU on yet, so map R1 to PC's space */ + BIC $KSEGM, R1 /* strip segment from address */ + MOVW PC, R2 /* get PC's segment ... */ + AND $KSEGM, R2 + ORR R2, R1 /* combine them */ + + /* drain write buffers */ + BARRIERS + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait + ISB + + MOVW CPSR, R2 + MOVM.DB.W [R2,R14], (SP) /* save regs on stack */ + CPSID /* splhi to make entire op atomic */ + + /* get cache sizes */ + SLL $1, R8, R0 /* R0 = (cache - 1) << 1 */ + MCR CpSC, CpIDcssel, R0, C(CpID), C(CpIDidct), 0 /* set cache size select */ + ISB + MRC CpSC, CpIDcsize, R0, C(CpID), C(CpIDidct), 0 /* get cache sizes */ + + /* compute # of ways and sets for this cache level */ + SRA $3, R0, R5 /* R5 (ways) = R0 >> 3 */ + AND $1023, R5 /* R5 = (R0 >> 3) & MASK(10) */ + ADD $1, R5 /* R5 (ways) = ((R0 >> 3) & MASK(10)) + 1 */ + + SRA $13, R0, R2 /* R2 = R0 >> 13 */ + AND $32767, R2 /* R2 = (R0 >> 13) & MASK(15) */ + ADD $1, R2 /* R2 (sets) = ((R0 >> 13) & MASK(15)) + 1 */ + + /* precompute set/way shifts for inner loop */ + CMP $0, R8 /* cache == 1? */ + MOVW.EQ $L1WAYSH, R3 /* yes */ + MOVW.EQ $L1SETSH, R4 + MOVW.NE $L2WAYSH, R3 /* no */ + MOVW.NE $L2SETSH, R4 + + /* iterate over ways */ + MOVW $0, R7 /* R7: way */ +outer: + /* iterate over sets */ + MOVW $0, R6 /* R6: set */ +inner: + /* compute set/way register contents */ + SLL R3, R7, R0 /* R0 = way << R3 (L?WAYSH) */ + ORR R8<<1, R0 /* R0 = way << L?WAYSH | (cache - 1) << 1 */ + ORR R6<<R4, R0 /* R0 = way<<L?WAYSH | (cache-1)<<1 |set<<R4 */ + + BL (R1) /* call set/way operation with R0 */ + + ADD $1, R6 /* set++ */ + CMP R2, R6 /* set >= sets? */ + BLT inner /* no, do next set */ + + ADD $1, R7 /* way++ */ + CMP R5, R7 /* way >= ways? */ + BLT outer /* no, do next way */ + + MOVM.IA.W (SP), [R2,R14] /* restore regs */ + MOVW R2, CPSR /* splx */ + + /* drain write buffers */ + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwb), CpCACHEwait + ISB + RET diff --git a/sys/src/9/bcm/clock.c b/sys/src/9/bcm/clock.c index 42d9a7b04..f67209bdf 100644 --- a/sys/src/9/bcm/clock.c +++ b/sys/src/9/bcm/clock.c @@ -1,11 +1,13 @@ /* - * bcm2835 timers + * bcm283[56] timers * System timers run at 1MHz (timers 1 and 2 are used by GPU) * ARM timer usually runs at 250MHz (may be slower in low power modes) * Cycle counter runs at 700MHz (unless overclocked) * All are free-running up-counters + * Cortex-a7 has local generic timers per cpu (which we run at 1MHz) * * Use system timer 3 (64 bits) for hzclock interrupts and fastticks + * For smp on bcm2836, use local generic timer for interrupts on cpu1-3 * Use ARM timer (32 bits) for perfticks * Use ARM timer to force immediate interrupt * Use cycle counter for cycles() @@ -17,14 +19,21 @@ #include "dat.h" #include "fns.h" #include "io.h" +#include "ureg.h" +#include "arm.h" enum { SYSTIMERS = VIRTIO+0x3000, ARMTIMER = VIRTIO+0xB400, + Localctl = 0x00, + Prescaler = 0x08, + Localintpending = 0x60, + SystimerFreq = 1*Mhz, MaxPeriod = SystimerFreq / HZ, - MinPeriod = SystimerFreq / (100*HZ), + MinPeriod = 10, + }; typedef struct Systimers Systimers; @@ -64,6 +73,11 @@ enum { TmrPrescale256 = 0x02<<2, CntWidth16 = 0<<1, CntWidth32 = 1<<1, + + /* generic timer (cortex-a7) */ + Enable = 1<<0, + Imask = 1<<1, + Istatus = 1<<2, }; static void @@ -71,12 +85,23 @@ clockintr(Ureg *ureg, void *) { Systimers *tn; + if(m->machno != 0) + panic("cpu%d: unexpected system timer interrupt", m->machno); tn = (Systimers*)SYSTIMERS; /* dismiss interrupt */ tn->cs = 1<<3; timerintr(ureg, 0); } +static void +localclockintr(Ureg *ureg, void *) +{ + if(m->machno == 0) + panic("cpu0: Unexpected local generic timer interrupt"); + cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Imask|Enable); + timerintr(ureg, 0); +} + void clockshutdown(void) { @@ -84,6 +109,10 @@ clockshutdown(void) tm = (Armtimer*)ARMTIMER; tm->ctl = 0; + if(cpuserver) + wdogfeed(); + else + wdogoff(); } void @@ -93,12 +122,16 @@ clockinit(void) Armtimer *tm; u32int t0, t1, tstart, tend; - tn = (Systimers*)SYSTIMERS; - tm = (Armtimer*)ARMTIMER; - tm->load = 0; - tm->ctl = TmrPrescale1|CntEnable|CntWidth32; - coherence(); + if(((cprdsc(0, CpID, CpIDfeat, 1) >> 16) & 0xF) != 0) { + /* generic timer supported */ + if(m->machno == 0){ + *(ulong*)(ARMLOCAL + Localctl) = 0; /* input clock is 19.2Mhz crystal */ + *(ulong*)(ARMLOCAL + Prescaler) = 0x06aaaaab; /* divide by (2^31/Prescaler) for 1Mhz */ + } + cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Imask); + } + tn = (Systimers*)SYSTIMERS; tstart = tn->clo; do{ t0 = lcycles(); @@ -111,25 +144,36 @@ clockinit(void) m->cpuhz = 100 * t1; m->cpumhz = (m->cpuhz + Mhz/2 - 1) / Mhz; m->cyclefreq = m->cpuhz; - - tn->c3 = tn->clo - 1; - intrenable(IRQtimer3, clockintr, nil, 0, "clock"); + if(m->machno == 0){ + tn->c3 = tn->clo - 1; + tm = (Armtimer*)ARMTIMER; + tm->load = 0; + tm->ctl = TmrPrescale1|CntEnable|CntWidth32; + intrenable(IRQtimer3, clockintr, nil, 0, "clock"); + }else + intrenable(IRQcntpns, localclockintr, nil, 0, "clock"); } void timerset(uvlong next) { Systimers *tn; - vlong now, period; + uvlong now; + long period; - tn = (Systimers*)SYSTIMERS; now = fastticks(nil); - period = next - fastticks(nil); + period = next - now; if(period < MinPeriod) - next = now + MinPeriod; + period = MinPeriod; else if(period > MaxPeriod) - next = now + MaxPeriod; - tn->c3 = (ulong)next; + period = MaxPeriod; + if(m->machno > 0){ + cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysval, period); + cpwrsc(0, CpTIMER, CpTIMERphys, CpTIMERphysctl, Enable); + }else{ + tn = (Systimers*)SYSTIMERS; + tn->c3 = tn->clo + period; + } } uvlong @@ -137,16 +181,17 @@ fastticks(uvlong *hz) { Systimers *tn; ulong lo, hi; + uvlong now; - tn = (Systimers*)SYSTIMERS; if(hz) *hz = SystimerFreq; + tn = (Systimers*)SYSTIMERS; do{ hi = tn->chi; lo = tn->clo; }while(tn->chi != hi); - m->fastclock = (uvlong)hi<<32 | lo; - return m->fastclock; + now = (uvlong)hi<<32 | lo; + return now; } ulong @@ -172,7 +217,6 @@ armtimerset(int n) tm->ctl &= ~(TmrEnable|TmrIntEnable); tm->irq = 1; } - coherence(); } ulong @@ -180,7 +224,7 @@ ulong { if(SystimerFreq != 1*Mhz) return fastticks2us(fastticks(nil)); - return fastticks(nil); + return ((Systimers*)SYSTIMERS)->clo; } void @@ -189,8 +233,8 @@ microdelay(int n) Systimers *tn; u32int now, diff; - tn = (Systimers*)SYSTIMERS; diff = n + 1; + tn = (Systimers*)SYSTIMERS; now = tn->clo; while(tn->clo - now < diff) ; diff --git a/sys/src/9/bcm/dat.h b/sys/src/9/bcm/dat.h index 20cbf3880..cf674cf3e 100644 --- a/sys/src/9/bcm/dat.h +++ b/sys/src/9/bcm/dat.h @@ -27,6 +27,7 @@ typedef struct PhysUart PhysUart; typedef struct PMMU PMMU; typedef struct Proc Proc; typedef u32int PTE; +typedef struct Soc Soc; typedef struct Uart Uart; typedef struct Ureg Ureg; typedef uvlong Tval; @@ -214,7 +215,7 @@ struct Mach typedef void KMap; #define VA(k) ((uintptr)(k)) #define kmap(p) (KMap*)((p)->pa|kseg0) -#define kunmap(k) +extern void kunmap(KMap*); struct { @@ -279,3 +280,29 @@ struct DevConf Devport *ports; /* The ports themselves */ }; +struct Soc { /* SoC dependent configuration */ + ulong dramsize; + uintptr physio; + uintptr busdram; + uintptr busio; + uintptr armlocal; + u32int l1ptedramattrs; + u32int l2ptedramattrs; +}; +extern Soc soc; + +#define BUSUNKNOWN -1 + +/* + * GPIO + */ +enum { + Input = 0x0, + Output = 0x1, + Alt0 = 0x4, + Alt1 = 0x5, + Alt2 = 0x6, + Alt3 = 0x7, + Alt4 = 0x3, + Alt5 = 0x2, +}; diff --git a/sys/src/9/bcm/devarch.c b/sys/src/9/bcm/devarch.c index a9b51c273..090c6ae77 100644 --- a/sys/src/9/bcm/devarch.c +++ b/sys/src/9/bcm/devarch.c @@ -150,17 +150,19 @@ Dev archdevtab = { static long cputyperead(Chan*, void *a, long n, vlong offset) { - char str[128]; + char name[64], str[128]; - snprint(str, sizeof str, "ARM11 %d\n", m->cpumhz); + cputype2name(name, sizeof name); + snprint(str, sizeof str, "ARM %s %d\n", name, m->cpumhz); return readstr(offset, a, n, str); } static long cputempread(Chan*, void *a, long n, vlong offset) { - char str[128]; - snprint(str, sizeof str, "%d±%d\n", gettemp(0) / 1000, 1); + char str[16]; + + snprint(str, sizeof str, "%ud\n", (getcputemp()+500)/1000); return readstr(offset, a, n, str); } diff --git a/sys/src/9/bcm/devgpio.c b/sys/src/9/bcm/devgpio.c index 76b893a02..8df38e865 100644 --- a/sys/src/9/bcm/devgpio.c +++ b/sys/src/9/bcm/devgpio.c @@ -232,109 +232,6 @@ getpintable(void) } } -// stolen from uartmini.c -#define GPIOREGS (VIRTIO+0x200000) -/* GPIO regs */ -enum { - Fsel0 = 0x00>>2, - FuncMask= 0x7, - Set0 = 0x1c>>2, - Clr0 = 0x28>>2, - Lev0 = 0x34>>2, - Evds0 = 0x40>>2, - Redge0 = 0x4C>>2, - Fedge0 = 0x58>>2, - Hpin0 = 0x64>>2, - Lpin0 = 0x70>>2, - ARedge0 = 0x7C>>2, - AFedge0 = 0x88>2, - PUD = 0x94>>2, - PUDclk0 = 0x98>>2, - PUDclk1 = 0x9c>>2, -}; - -static void -gpiofuncset(uint pin, int func) -{ - u32int *gp, *fsel; - int off; - - gp = (u32int*)GPIOREGS; - fsel = &gp[Fsel0 + pin/10]; - off = (pin % 10) * 3; - *fsel = (*fsel & ~(FuncMask<<off)) | func<<off; -} - -static int -gpiofuncget(uint pin) -{ - u32int *gp, *fsel; - int off; - - gp = (u32int*)GPIOREGS; - fsel = &gp[Fsel0 + pin/10]; - off = (pin % 10) * 3; - return ((*fsel >> off) & FuncMask); -} - -static void -gpiopullset(uint pin, int state) -{ - u32int *gp, *reg; - u32int mask; - - gp = (u32int*)GPIOREGS; - reg = &gp[PUDclk0 + pin/32]; - mask = 1 << (pin % 32); - gp[PUD] = state; - microdelay(1); - *reg = mask; - microdelay(1); - *reg = 0; -} - -static void -gpioout(uint pin, int set) -{ - u32int *gp; - int v; - - gp = (u32int*)GPIOREGS; - v = set? Set0 : Clr0; - gp[v + pin/32] = 1 << (pin % 32); -} - -static int -gpioin(uint pin) -{ - u32int *gp; - - gp = (u32int*)GPIOREGS; - return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0; -} - -static void -gpioevent(uint pin, int event, int enable) -{ - u32int *gp, *field; - int reg = 0; - - switch(event) - { - case Erising: - reg = Redge0; - break; - case Efalling: - reg = Fedge0; - break; - default: - panic("gpio: unknown event type"); - } - gp = (u32int*)GPIOREGS; - field = &gp[reg + pin/32]; - SET_BIT(field, pin, enable); -} - static void mkdeventry(Chan *c, Qid qid, Dirtab *tab, Dir *db) { @@ -417,27 +314,16 @@ static void interrupt(Ureg*, void *) { - u32int *gp, *field; - char pin; + uint pin; - gp = (u32int*)GPIOREGS; - - int set; - coherence(); eventvalue = 0; for(pin = 0; pin < PIN_TABLE_SIZE; pin++) { - set = (gp[Evds0 + pin/32] & (1 << (pin % 32))) != 0; - - if(set) - { - field = &gp[Evds0 + pin/32]; - SET_BIT(field, pin, 1); + if(gpiogetevent(pin)) SET_BIT(&eventvalue, pin, 1); - } } coherence(); @@ -447,7 +333,8 @@ interrupt(Ureg*, void *) static void gpioinit(void) { - boardrev = getrevision() & 0xff; + gpiomeminit(); + boardrev = getboardrev() & 0xff; pinscheme = Qboard; intrenable(49, interrupt, nil, 0, "gpio1"); } @@ -676,7 +563,7 @@ gpiowrite(Chan *c, void *va, long n, vlong) { if(strncmp(funcname[i], arg, strlen(funcname[i])) == 0) { - gpiofuncset(pin, i); + gpiosel(pin, i); break; } } @@ -691,7 +578,7 @@ gpiowrite(Chan *c, void *va, long n, vlong) { if(strncmp(pudname[i], arg, strlen(pudname[i])) == 0) { - gpiopullset(pin, i); + gpiopull(pin, i); break; } } @@ -707,7 +594,7 @@ gpiowrite(Chan *c, void *va, long n, vlong) { if(strncmp(evtypename[i], arg, strlen(evtypename[i])) == 0) { - gpioevent(pin, i, (cb->f[2][0] == 'e')); + gpioselevent(pin, i, (cb->f[2][0] == 'e')); break; } } diff --git a/sys/src/9/bcm/dma.c b/sys/src/9/bcm/dma.c index 0a071ca00..8c3b0bd0a 100644 --- a/sys/src/9/bcm/dma.c +++ b/sys/src/9/bcm/dma.c @@ -25,7 +25,7 @@ enum { Nchan = 7, /* number of dma channels */ Regsize = 0x100, /* size of regs for each chan */ - Cbalign = 32, /* control block byte alignment */ + Cbalign = 64, /* control block byte alignment (allow for 64-byte cache on bcm2836) */ Dbg = 0, /* registers for each dma controller */ @@ -97,6 +97,18 @@ struct Cb { static Ctlr dma[Nchan]; static u32int *dmaregs = (u32int*)DMAREGS; +uintptr +dmaaddr(void *va) +{ + return soc.busdram | (PTR2UINT(va) & ~KSEGM); +} + +static uintptr +dmaioaddr(void *va) +{ + return soc.busio | (PTR2UINT(va) & ~VIRTIO); +} + static void dump(char *msg, uchar *p, int n) { @@ -146,7 +158,7 @@ dmastart(int chan, int dev, int dir, void *src, void *dst, int len) ctlr->regs = (u32int*)(DMAREGS + chan*Regsize); ctlr->cb = xspanalloc(sizeof(Cb), Cbalign, 0); assert(ctlr->cb != nil); - dmaregs[Enable] |= 1 << chan; + dmaregs[Enable] |= 1<<chan; ctlr->regs[Cs] = Reset; while(ctlr->regs[Cs] & Reset) ; @@ -156,33 +168,33 @@ dmastart(int chan, int dev, int dir, void *src, void *dst, int len) ti = 0; switch(dir){ case DmaD2M: - cachedwbinvse(dst, len); + cachedinvse(dst, len); ti = Srcdreq | Destinc; - cb->sourcead = DMAIO(src); - cb->destad = DMAADDR(dst); + cb->sourcead = dmaioaddr(src); + cb->destad = dmaaddr(dst); break; case DmaM2D: cachedwbse(src, len); ti = Destdreq | Srcinc; - cb->sourcead = DMAADDR(src); - cb->destad = DMAIO(dst); + cb->sourcead = dmaaddr(src); + cb->destad = dmaioaddr(dst); break; case DmaM2M: cachedwbse(src, len); - cachedwbinvse(dst, len); + cachedinvse(dst, len); ti = Srcinc | Destinc; - cb->sourcead = DMAADDR(src); - cb->destad = DMAADDR(dst); + cb->sourcead = dmaaddr(src); + cb->destad = dmaaddr(dst); break; } - cb->ti = ti | dev << Permapshift | Inten; + cb->ti = ti | dev<<Permapshift | Inten; cb->txfrlen = len; cb->stride = 0; cb->nextconbk = 0; cachedwbse(cb, sizeof(Cb)); ctlr->regs[Cs] = 0; microdelay(1); - ctlr->regs[Conblkad] = DMAADDR(cb); + ctlr->regs[Conblkad] = dmaaddr(cb); DBG print("dma start: %ux %ux %ux %ux %ux %ux\n", cb->ti, cb->sourcead, cb->destad, cb->txfrlen, cb->stride, cb->nextconbk); diff --git a/sys/src/9/bcm/dwcotg.h b/sys/src/9/bcm/dwcotg.h index 545cb80ed..28f5a61a5 100644 --- a/sys/src/9/bcm/dwcotg.h +++ b/sys/src/9/bcm/dwcotg.h @@ -434,8 +434,8 @@ enum { Episo = 1<<18, Epbulk = 2<<18, Epintr = 3<<18, - Multicnt = 0x3<<20, /* transactions per μframe or retries */ - /* per periodic split */ + Multicnt = 0x3<<20, /* transactions per μframe */ + /* or retries per periodic split */ OMulticnt = 20, Devaddr = 0x7f<<22, /* device address */ ODevaddr = 22, diff --git a/sys/src/9/bcm/fns.h b/sys/src/9/bcm/fns.h index a9944a383..eb9c09eb5 100644 --- a/sys/src/9/bcm/fns.h +++ b/sys/src/9/bcm/fns.h @@ -10,6 +10,9 @@ extern void cachedwbse(void*, int); extern void cachedwbinvse(void*, int); extern void cacheiinv(void); extern void cacheuwbinv(void); +extern void cachedwbtlb(void*, int); +extern void cacheiinvse(void*, int); +extern void cachedinvse(void*, int); extern uintptr cankaddr(uintptr pa); extern int cas32(void*, u32int, u32int); extern void checkmmu(uintptr, uintptr); @@ -20,11 +23,13 @@ extern void coherence(void); extern ulong cprd(int cp, int op1, int crn, int crm, int op2); extern ulong cprdsc(int op1, int crn, int crm, int op2); extern void cpuidprint(void); +extern u32int cpidget(void); extern void cpwr(int cp, int op1, int crn, int crm, int op2, ulong val); extern void cpwrsc(int op1, int crn, int crm, int op2, ulong val); #define cycles(ip) *(ip) = lcycles() extern void dmastart(int, int, int, void*, void*, int); extern int dmawait(int); +extern uintptr dmaaddr(void *va); extern int fbblank(int); extern void* fbinit(int, int*, int*, int*); extern u32int farget(void); @@ -41,13 +46,26 @@ extern char *getethermac(void); extern uint getfirmware(void); extern int getpower(int); extern void getramsize(Confmem*); +extern int getncpus(void); +extern void gpiosel(uint, int); +extern void gpiopull(uint, int); +extern void gpiopullup(uint); +extern void gpiopulloff(uint); +extern void gpiopulldown(uint); +extern void gpioout(uint, int); +extern int gpioin(uint); +extern void gpioselevent(uint, int, int); +extern int gpiogetevent(uint); +extern void gpiomeminit(void); extern u32int ifsrget(void); +extern void intrcpushutdown(void); extern void irqenable(int, void (*)(Ureg*, void*), void*); #define intrenable(i, f, a, b, n) irqenable((i), (f), (a)) extern void intrsoff(void); extern int isaconfig(char*, int, ISAConf*); +extern void l2cacheuwbinv(void); extern void links(void); -extern void mmuinit(void); +extern void mmuinit(void*); extern void mmuinit1(void); extern void mmuinvalidate(void); extern void mmuinvalidateaddr(u32int); @@ -58,8 +76,10 @@ extern void procsave(Proc*); extern void procfork(Proc*); extern void procsetup(Proc*); extern void screeninit(void); +extern void setclkrate(int, ulong); extern void setpower(int, int); extern void setr13(int, u32int*); +extern int startcpus(uint); extern int splfhi(void); extern int splflo(void); extern int tas(void *); @@ -68,9 +88,17 @@ extern void trapinit(void); extern void uartconsinit(void); extern int userureg(Ureg*); extern void vectors(void); +extern void vgpinit(void); +extern void vgpset(uint, int); extern void vtable(void); -extern uint gettemp(int); -extern uint getrevision(void); +extern void wdogoff(void); +extern void wdogfeed(void); +extern void vtable(void); +extern int l2ap(int); +extern uint getcputemp(void); +extern char *cputype2name(char *buf, int size); +extern uint getboardrev(void); +extern void sev(void); /* * floating point emulation diff --git a/sys/src/9/bcm/gpio.c b/sys/src/9/bcm/gpio.c new file mode 100644 index 000000000..b970e96f6 --- /dev/null +++ b/sys/src/9/bcm/gpio.c @@ -0,0 +1,142 @@ +/* + * Raspberry Pi GPIO support + */ + +#include "u.h" +#include "../port/lib.h" +#include "../port/error.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" + +#define GPIOREGS (VIRTIO+0x200000) + +/* GPIO regs */ +enum { + Fsel0 = 0x00>>2, + FuncMask= 0x7, + Set0 = 0x1c>>2, + Clr0 = 0x28>>2, + Lev0 = 0x34>>2, + Evds0 = 0x40>>2, + Redge0 = 0x4C>>2, + Fedge0 = 0x58>>2, + Hpin0 = 0x64>>2, + Lpin0 = 0x70>>2, + ARedge0 = 0x7C>>2, + AFedge0 = 0x88>2, + PUD = 0x94>>2, + Off = 0x0, + Pulldown= 0x1, + Pullup = 0x2, + PUDclk0 = 0x98>>2, + PUDclk1 = 0x9c>>2, +}; + +void +gpiosel(uint pin, int func) +{ + u32int *gp, *fsel; + int off; + + gp = (u32int*)GPIOREGS; + fsel = &gp[Fsel0 + pin/10]; + off = (pin % 10) * 3; + *fsel = (*fsel & ~(FuncMask<<off)) | func<<off; +} + +void +gpiopull(uint pin, int func) +{ + u32int *gp, *reg; + u32int mask; + + gp = (u32int*)GPIOREGS; + reg = &gp[PUDclk0 + pin/32]; + mask = 1 << (pin % 32); + gp[PUD] = func; + microdelay(1); + *reg = mask; + microdelay(1); + *reg = 0; +} + +void +gpiopulloff(uint pin) +{ + gpiopull(pin, Off); +} + +void +gpiopullup(uint pin) +{ + gpiopull(pin, Pullup); +} + +void +gpiopulldown(uint pin) +{ + gpiopull(pin, Pulldown); +} + +void +gpioout(uint pin, int set) +{ + u32int *gp; + int v; + + gp = (u32int*)GPIOREGS; + v = set? Set0 : Clr0; + gp[v + pin/32] = 1 << (pin % 32); +} + +int +gpioin(uint pin) +{ + u32int *gp; + + gp = (u32int*)GPIOREGS; + return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0; +} + +void +gpioselevent(uint pin, int falling, int enable) +{ + u32int *gp, *field; + int reg; + + enable = enable != 0; + if(falling) + reg = Fedge0; + else + reg = Redge0; + gp = (u32int*)GPIOREGS; + field = &gp[reg + pin/32]; + *field = (*field & ~(enable<<pin)) | (enable<<pin); +} + +int +gpiogetevent(uint pin) +{ + u32int *gp, *reg, val; + + gp = (u32int*)GPIOREGS; + reg = &gp[Evds0 + pin/32]; + val = *reg & (1 << (pin % 32)); + *reg |= val; + return val != 0; +} + +void +gpiomeminit(void) +{ + Physseg seg; + + memset(&seg, 0, sizeof seg); + seg.attr = SG_PHYSICAL; + seg.name = "gpio"; + seg.pa = GPIOREGS; + seg.size = BY2PG; + addphysseg(&seg); +} diff --git a/sys/src/9/bcm/io.h b/sys/src/9/bcm/io.h index 0bf103670..a077ef69d 100644 --- a/sys/src/9/bcm/io.h +++ b/sys/src/9/bcm/io.h @@ -8,11 +8,23 @@ enum { IRQdma0 = 16, #define IRQDMA(chan) (IRQdma0+(chan)) IRQaux = 29, + IRQi2c = 53, + IRQspi = 54, + IRQsdhost = 56, IRQmmc = 62, IRQbasic = 64, IRQtimerArm = IRQbasic + 0, + IRQlocal = 96, + IRQcntps = IRQlocal + 0, + IRQcntpns = IRQlocal + 1, + IRQmbox0 = IRQlocal + 4, + IRQmbox1 = IRQlocal + 5, + IRQmbox2 = IRQlocal + 6, + IRQmbox3 = IRQlocal + 7, + IRQlocaltmr = IRQlocal + 11, + IRQfiq = IRQusb, /* only one source can be FIQ */ DmaD2M = 0, /* device to memory */ @@ -20,7 +32,14 @@ enum { DmaM2M = 2, /* memory to memory */ DmaChanEmmc = 4, /* can only use 2-5, maybe 0 */ + DmaChanSdhost = 5, + DmaChanSpiTx= 2, + DmaChanSpiRx= 0, + + DmaDevSpiTx = 6, + DmaDevSpiRx = 7, DmaDevEmmc = 11, + DmaDevSdhost = 13, PowerSd = 0, PowerUart0, @@ -43,4 +62,3 @@ enum { ClkPixel, ClkPwm, }; -#define BUSUNKNOWN (-1) diff --git a/sys/src/9/bcm/l.s b/sys/src/9/bcm/l.s index 6c716350f..005f03a44 100644 --- a/sys/src/9/bcm/l.s +++ b/sys/src/9/bcm/l.s @@ -1,10 +1,14 @@ /* - * Broadcom bcm2835 SoC, as used in Raspberry Pi - * arm1176jzf-s processor (armv6) + * Common startup for armv6 and armv7 + * The rest of l.s has been moved to armv[67].s */ #include "arm.s" +/* + * on bcm2836, only cpu0 starts here + * other cpus enter at cpureset in armv7.s + */ TEXT _start(SB), 1, $-4 /* * load physical base for SB addressing while mmu is off @@ -16,259 +20,14 @@ TEXT _start(SB), 1, $-4 MOVW $0, R0 /* - * SVC mode, interrupts disabled - */ - MOVW $(PsrDirq|PsrDfiq|PsrMsvc), R1 - MOVW R1, CPSR - - /* - * disable the mmu and L1 caches - * invalidate caches and tlb - */ - MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl - BIC $(CpCdcache|CpCicache|CpCpredict|CpCmmu), R1 - MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvu), CpCACHEall - MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv - ISB - - /* - * clear mach and page tables - */ - MOVW $PADDR(MACHADDR), R1 - MOVW $PADDR(KTZERO), R2 -_ramZ: - MOVW R0, (R1) - ADD $4, R1 - CMP R1, R2 - BNE _ramZ - - /* * start stack at top of mach (physical addr) - * set up page tables for kernel */ MOVW $PADDR(MACHADDR+MACHSIZE-4), R13 - BL ,mmuinit(SB) - - /* - * set up domain access control and page table base - */ - MOVW $Client, R1 - MCR CpSC, 0, R1, C(CpDAC), C(0) - MOVW $PADDR(L1), R1 - MCR CpSC, 0, R1, C(CpTTB), C(0) - - /* - * enable caches, mmu, and high vectors - */ - MRC CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl - ORR $(CpChv|CpCdcache|CpCicache|CpCmmu), R0 - MCR CpSC, 0, R0, C(CpCONTROL), C(0), CpMainctl - ISB - - /* - * switch SB, SP, and PC into KZERO space - */ - MOVW $setR12(SB), R12 - MOVW $(MACHADDR+MACHSIZE-4), R13 - MOVW $_startpg(SB), R15 - -TEXT _startpg(SB), 1, $-4 - - /* - * enable cycle counter - */ - MOVW $1, R1 - MCR CpSC, 0, R1, C(CpSPM), C(CpSPMperf), CpSPMctl /* - * call main and loop forever if it returns + * do arch-dependent startup (no return) */ - BL ,main(SB) + BL ,armstart(SB) B ,0(PC) - BL _div(SB) /* hack to load _div, etc. */ - -TEXT fsrget(SB), 1, $-4 /* data fault status */ - MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRdata - RET - -TEXT ifsrget(SB), 1, $-4 /* instruction fault status */ - MRC CpSC, 0, R0, C(CpFSR), C(0), CpFSRinst - RET - -TEXT farget(SB), 1, $-4 /* fault address */ - MRC CpSC, 0, R0, C(CpFAR), C(0x0) - RET - -TEXT lcycles(SB), 1, $-4 - MRC CpSC, 0, R0, C(CpSPM), C(CpSPMperf), CpSPMcyc - RET - -TEXT splhi(SB), 1, $-4 - MOVW $(MACHADDR+4), R2 /* save caller pc in Mach */ - MOVW R14, 0(R2) - - MOVW CPSR, R0 /* turn off irqs (but not fiqs) */ - ORR $(PsrDirq), R0, R1 - MOVW R1, CPSR - RET - -TEXT splfhi(SB), 1, $-4 - MOVW $(MACHADDR+4), R2 /* save caller pc in Mach */ - MOVW R14, 0(R2) - - MOVW CPSR, R0 /* turn off irqs and fiqs */ - ORR $(PsrDirq|PsrDfiq), R0, R1 - MOVW R1, CPSR - RET - -TEXT splflo(SB), 1, $-4 - MOVW CPSR, R0 /* turn on fiqs */ - BIC $(PsrDfiq), R0, R1 - MOVW R1, CPSR - RET - -TEXT spllo(SB), 1, $-4 - MOVW CPSR, R0 /* turn on irqs and fiqs */ - BIC $(PsrDirq|PsrDfiq), R0, R1 - MOVW R1, CPSR - RET - -TEXT splx(SB), 1, $-4 - MOVW $(MACHADDR+0x04), R2 /* save caller pc in Mach */ - MOVW R14, 0(R2) - - MOVW R0, R1 /* reset interrupt level */ - MOVW CPSR, R0 - MOVW R1, CPSR - RET - -TEXT spldone(SB), 1, $0 /* end marker for devkprof.c */ - RET - -TEXT islo(SB), 1, $-4 - MOVW CPSR, R0 - AND $(PsrDirq), R0 - EOR $(PsrDirq), R0 - RET - -TEXT tas(SB), $-4 -TEXT _tas(SB), $-4 - MOVW R0,R1 - MOVW $1,R0 - SWPW R0,(R1) /* fix: deprecated in armv6 */ - RET - -TEXT setlabel(SB), 1, $-4 - MOVW R13, 0(R0) /* sp */ - MOVW R14, 4(R0) /* pc */ - MOVW $0, R0 - RET - -TEXT gotolabel(SB), 1, $-4 - MOVW 0(R0), R13 /* sp */ - MOVW 4(R0), R14 /* pc */ - MOVW $1, R0 - RET - -TEXT getcallerpc(SB), 1, $-4 - MOVW 0(R13), R0 - RET - -TEXT idlehands(SB), $-4 - BARRIERS - MOVW CPSR, R3 - BIC $(PsrDirq|PsrDfiq), R3, R1 /* spllo */ - MOVW R1, CPSR - - MOVW $0, R0 /* wait for interrupt */ - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEintr), CpCACHEwait - ISB - - MOVW R3, CPSR /* splx */ - RET - - -TEXT coherence(SB), $-4 - BARRIERS - RET - -/* - * invalidate tlb - */ -TEXT mmuinvalidate(SB), 1, $-4 - MOVW $0, R0 - MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv - BARRIERS - RET - -/* - * mmuinvalidateaddr(va) - * invalidate tlb entry for virtual page address va, ASID 0 - */ -TEXT mmuinvalidateaddr(SB), 1, $-4 - MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinvse - BARRIERS - RET - -/* - * drain write buffer - * writeback and invalidate data cache - */ -TEXT cachedwbinv(SB), 1, $-4 - DSB - MOVW $0, R0 - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall - RET - -/* - * cachedwbinvse(va, n) - * drain write buffer - * writeback and invalidate data cache range [va, va+n) - */ -TEXT cachedwbinvse(SB), 1, $-4 - MOVW R0, R1 /* DSB clears R0 */ - DSB - MOVW n+4(FP), R2 - ADD R1, R2 - SUB $1, R2 - BIC $(CACHELINESZ-1), R1 - BIC $(CACHELINESZ-1), R2 - MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwbi) - RET - -/* - * cachedwbse(va, n) - * drain write buffer - * writeback data cache range [va, va+n) - */ -TEXT cachedwbse(SB), 1, $-4 - MOVW R0, R1 /* DSB clears R0 */ - DSB - MOVW n+4(FP), R2 - ADD R1, R2 - BIC $(CACHELINESZ-1), R1 - BIC $(CACHELINESZ-1), R2 - MCRR(CpSC, 0, 2, 1, CpCACHERANGEdwb) - RET - -/* - * drain write buffer and prefetch buffer - * writeback and invalidate data cache - * invalidate instruction cache - */ -TEXT cacheuwbinv(SB), 1, $-4 - BARRIERS - MOVW $0, R0 - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall - RET - -/* - * invalidate instruction cache - */ -TEXT cacheiinv(SB), 1, $-4 - MOVW $0, R0 - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall RET diff --git a/sys/src/9/bcm/lexception.s b/sys/src/9/bcm/lexception.s index d9653ecd7..bb1693d7b 100644 --- a/sys/src/9/bcm/lexception.s +++ b/sys/src/9/bcm/lexception.s @@ -27,6 +27,7 @@ TEXT vtable(SB), 1, $-4 WORD $_vfiq(SB) /* FIQ, switch to svc mode */ TEXT _vsvc(SB), 1, $-4 /* SWI */ + CLREX MOVW.W R14, -4(R13) /* ureg->pc = interrupted PC */ MOVW SPSR, R14 /* ureg->psr = SPSR */ MOVW.W R14, -4(R13) /* ... */ @@ -39,9 +40,16 @@ TEXT _vsvc(SB), 1, $-4 /* SWI */ MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */ -// MOVW $(KSEG0+16*KiB-MACHSIZE), R10 /* m */ - MOVW $(MACHADDR), R10 /* m */ - MOVW 8(R10), R9 /* up */ + /* get R(MACH) for this cpu */ + CPUID(R1) + SLL $2, R1 /* convert to word index */ + MOVW $machaddr(SB), R2 + ADD R1, R2 + MOVW (R2), R(MACH) /* m = machaddr[cpuid] */ + CMP $0, R(MACH) + MOVW.EQ $MACHADDR, R0 /* paranoia: use MACHADDR if 0 */ + + MOVW 8(R(MACH)), R(USER) /* up */ MOVW R13, R0 /* first arg is pointer to ureg */ SUB $8, R13 /* space for argument+link */ @@ -81,6 +89,7 @@ TEXT _virq(SB), 1, $-4 /* IRQ */ * we'll switch to SVC mode and then call trap. */ _vswitch: + CLREX MOVW SPSR, R1 /* save SPSR for ureg */ MOVW R14, R2 /* save interrupted pc for ureg */ MOVW R13, R3 /* save pointer to where the original [R0-R4] are */ @@ -119,7 +128,16 @@ _vswitch: BL trap(SB) + MOVW $setR12(SB), R12 /* reload kernel's SB (ORLY?) */ ADD $(4*2+4*15), R13 /* make r13 point to ureg->type */ + /* + * if we interrupted a previous trap's handler and are now + * returning to it, we need to propagate the current R(MACH) (R10) + * by overriding the saved one on the stack, since we may have + * been rescheduled and be on a different processor now than + * at entry. + */ + MOVW R(MACH), (-(15-MACH)*4)(R13) /* restore current cpu's MACH */ MOVW 8(R13), R14 /* restore link */ MOVW 4(R13), R0 /* restore SPSR */ MOVW R0, SPSR /* ... */ @@ -140,9 +158,16 @@ _userexcep: MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */ -// MOVW $(KSEG0+16*KiB-MACHSIZE), R10 /* m */ - MOVW $(MACHADDR), R10 /* m */ - MOVW 8(R10), R9 /* up */ + /* get R(MACH) for this cpu */ + CPUID(R1) + SLL $2, R1 /* convert to word index */ + MOVW $machaddr(SB), R2 + ADD R1, R2 + MOVW (R2), R(MACH) /* m = machaddr[cpuid] */ + CMP $0, R(MACH) + MOVW.EQ $MACHADDR, R(MACH) /* paranoia: use MACHADDR if 0 */ + + MOVW 8(R(MACH)), R(USER) /* up */ MOVW R13, R0 /* first arg is pointer to ureg */ SUB $(4*2), R13 /* space for argument+link (for debugger) */ @@ -158,14 +183,24 @@ _userexcep: RFE /* MOVM.IA.S.W (R13), [R15] */ TEXT _vfiq(SB), 1, $-4 /* FIQ */ + CLREX MOVW $PsrMfiq, R8 /* trap type */ MOVW SPSR, R9 /* interrupted psr */ MOVW R14, R10 /* interrupted pc */ MOVM.DB.W [R8-R10], (R13) /* save in ureg */ - MOVM.DB.W.S [R0-R14], (R13) /* save interrupted regs */ + MOVM.DB.S [R0-R14], (R13) /* save interrupted regs */ + SUB $(15*4), R13 MOVW $setR12(SB), R12 /* Make sure we've got the kernel's SB loaded */ - MOVW $(MACHADDR), R10 /* m */ - MOVW 8(R10), R9 /* up */ + /* get R(MACH) for this cpu */ + CPUID(R1) + SLL $2, R1 /* convert to word index */ + MOVW $machaddr(SB), R2 + ADD R1, R2 + MOVW (R2), R(MACH) /* m = machaddr[cpuid] */ + CMP $0, R(MACH) + MOVW.EQ $MACHADDR, R(MACH) /* paranoia: use MACHADDR if 0 */ + + MOVW 8(R(MACH)), R(USER) /* up */ MOVW R13, R0 /* first arg is pointer to ureg */ SUB $(4*2), R13 /* space for argument+link (for debugger) */ @@ -187,6 +222,7 @@ TEXT setr13(SB), 1, $-4 MOVW CPSR, R2 BIC $PsrMask, R2, R3 + ORR $(PsrDirq|PsrDfiq), R3 ORR R0, R3 MOVW R3, CPSR /* switch to new mode */ diff --git a/sys/src/9/bcm/main.c b/sys/src/9/bcm/main.c index 4167f2b6f..54956a3d0 100644 --- a/sys/src/9/bcm/main.c +++ b/sys/src/9/bcm/main.c @@ -4,6 +4,7 @@ #include "mem.h" #include "dat.h" #include "fns.h" +#include "io.h" #include "init.h" #include <pool.h> @@ -191,14 +192,27 @@ ataginit(Atag *a) void machinit(void) { + Mach *m0; + + m->ticks = 1; + m->perf.period = 1; + m0 = MACHP(0); + if (m->machno != 0) { + /* synchronise with cpu 0 */ + m->ticks = m0->ticks; + } +} + +void +mach0init(void) +{ + m->mmul1 = (PTE*)L1; m->machno = 0; machaddr[m->machno] = m; m->ticks = 1; m->perf.period = 1; - conf.nmach = 1; - active.machs[0] = 1; active.exiting = 0; @@ -206,6 +220,32 @@ machinit(void) } static void +launchinit(void) +{ + int mach; + Mach *mm; + PTE *l1; + + for(mach = 1; mach < conf.nmach; mach++){ + machaddr[mach] = mm = mallocalign(MACHSIZE, MACHSIZE, 0, 0); + l1 = mallocalign(L1SIZE, L1SIZE, 0, 0); + if(mm == nil || l1 == nil) + panic("launchinit"); + memset(mm, 0, MACHSIZE); + mm->machno = mach; + + memmove(l1, m->mmul1, L1SIZE); /* clone cpu0's l1 table */ + cachedwbse(l1, L1SIZE); + mm->mmul1 = l1; + cachedwbse(mm, MACHSIZE); + + } + cachedwbse(machaddr, sizeof machaddr); + if((mach = startcpus(conf.nmach)) < conf.nmach) + print("only %d cpu%s started\n", mach, mach == 1? "" : "s"); +} + +static void optionsinit(char* s) { strecpy(oargb, oargb+sizeof(oargb), s); @@ -216,29 +256,14 @@ optionsinit(char* s) } void -gpiomeminit(void) -{ - Physseg seg; - memset(&seg, 0, sizeof seg); - seg.attr = SG_PHYSICAL; - seg.name = "gpio"; - seg.pa = (VIRTIO+0x200000); - seg.size = BY2PG; - addphysseg(&seg); -} - - -void main(void) { extern char edata[], end[]; - uint rev; + uint fw, board; - okay(1); m = (Mach*)MACHADDR; memset(edata, 0, end - edata); /* clear bss */ - machinit(); - mmuinit1(); + mach0init(); optionsinit("/boot/boot boot"); quotefmtinstall(); @@ -250,20 +275,24 @@ main(void) screeninit(); print("\nPlan 9 from Bell Labs\n"); - rev = getfirmware(); - print("firmware: rev %d\n", rev); - if(rev < Minfirmrev){ - print("Sorry, firmware (start.elf) must be at least rev %d (%s)\n", - Minfirmrev, Minfirmdate); + board = getboardrev(); + fw = getfirmware(); + print("board rev: %#ux firmware rev: %d\n", board, fw); + if(fw < Minfirmrev){ + print("Sorry, firmware (start*.elf) must be at least rev %d" + " or newer than %s\n", Minfirmrev, Minfirmdate); for(;;) ; } + /* set clock rate to arm_freq from config.txt (default pi1:700Mhz pi2:900MHz) */ + setclkrate(ClkArm, 0); trapinit(); clockinit(); printinit(); timersinit(); cpuidprint(); archreset(); + vgpinit(); procinit0(); initseg(); @@ -271,7 +300,8 @@ main(void) chandevreset(); /* most devices are discovered here */ pageinit(); userinit(); - gpiomeminit(); + launchinit(); + mmuinit1(); schedinit(); assert(0); /* shouldn't have returned */ } @@ -484,8 +514,7 @@ confinit(void) conf.upages = conf.npage - kpages; conf.ialloc = (kpages/2)*BY2PG; - /* only one processor */ - conf.nmach = 1; + conf.nmach = getncpus(); /* set up other configuration parameters */ conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5; @@ -497,7 +526,7 @@ confinit(void) conf.nswppo = 4096; conf.nimage = 200; - conf.copymode = 0; /* copy on write */ + conf.copymode = conf.nmach > 1; /* * Guess how much is taken by the large permanent @@ -529,6 +558,14 @@ exit(int) { cpushutdown(); splfhi(); + if(m->machno != 0){ + void (*f)(ulong, ulong, ulong) = (void*)REBOOTADDR; + intrsoff(); + intrcpushutdown(); + cacheuwbinv(); + (*f)(0, 0, 0); + for(;;); + } archreboot(); } @@ -536,11 +573,9 @@ exit(int) * stub for ../omap/devether.c */ int -isaconfig(char *class, int ctlrno, ISAConf *isa) +isaconfig(char *, int, ISAConf *) { - USED(ctlrno); - USED(isa); - return strcmp(class, "ether") == 0; + return 0; } /* @@ -553,37 +588,39 @@ reboot(void *entry, void *code, ulong size) void (*f)(ulong, ulong, ulong); writeconf(); + if (m->machno != 0) { + procwired(up, 0); + sched(); + } + + /* setup reboot trampoline function */ + f = (void*)REBOOTADDR; + memmove(f, rebootcode, sizeof(rebootcode)); + cachedwbse(f, sizeof(rebootcode)); + cpushutdown(); + delay(500); + + splfhi(); /* turn off buffered serial console */ serialoq = nil; - kprintoq = nil; - screenputs = nil; /* shutdown devices */ chandevshutdown(); /* stop the clock (and watchdog if any) */ clockshutdown(); - - splfhi(); intrsoff(); + intrcpushutdown(); - /* setup reboot trampoline function */ - f = (void*)REBOOTADDR; - memmove(f, rebootcode, sizeof(rebootcode)); cacheuwbinv(); + l2cacheuwbinv(); /* off we go - never to return */ (*f)(PADDR(entry), PADDR(code), size); } -int -cmpswap(long *addr, long old, long new) -{ - return cas32(addr, old, new); -} - void setupwatchpts(Proc *, Watchpt *, int n) { diff --git a/sys/src/9/bcm/mem.h b/sys/src/9/bcm/mem.h index 8270fc6ba..f9b6af838 100644 --- a/sys/src/9/bcm/mem.h +++ b/sys/src/9/bcm/mem.h @@ -5,27 +5,31 @@ #define MiB 1048576u /* Mebi 0x0000000000100000 */ #define GiB 1073741824u /* Gibi 000000000040000000 */ -#define HOWMANY(x, y) (((x)+((y)-1))/(y)) -#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) /* ceiling */ -#define ROUNDDN(x, y) (((x)/(y))*(y)) /* floor */ -#define MIN(a, b) ((a) < (b)? (a): (b)) -#define MAX(a, b) ((a) > (b)? (a): (b)) - /* * Sizes */ #define BY2PG (4*KiB) /* bytes per page */ #define PGSHIFT 12 /* log(BY2PG) */ +#define HOWMANY(x,y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x,y) (HOWMANY((x),(y))*(y)) #define PGROUND(s) ROUNDUP(s, BY2PG) #define ROUND(s, sz) (((s)+(sz-1))&~(sz-1)) -#define MAXMACH 1 /* max # cpus system can run */ +#define MAXMACH 4 /* max # cpus system can run */ #define MACHSIZE BY2PG +#define L1SIZE (4 * BY2PG) #define KSTKSIZE (8*KiB) #define STACKALIGN(sp) ((sp) & ~3) /* bug: assure with alloc */ /* + * Magic registers + */ + +#define USER 9 /* R9 is up-> */ +#define MACH 10 /* R10 is m-> */ + +/* * Address spaces. * KTZERO is used by kprof and dumpstack (if any). * @@ -36,8 +40,8 @@ */ #define KSEG0 0x80000000 /* kernel segment */ -/* mask to check segment; good for 512MB dram */ -#define KSEGM 0xE0000000 +/* mask to check segment; good for 1GB dram */ +#define KSEGM 0xC0000000 #define KZERO KSEG0 /* kernel address space */ #define CONFADDR (KZERO+0x100) /* unparsed plan9.ini */ #define MACHADDR (KZERO+0x2000) /* Mach structure */ @@ -47,20 +51,24 @@ #define L1 (KZERO+0x4000) /* tt ptes: 16KiB aligned */ #define KTZERO (KZERO+0x8000) /* kernel text start */ #define VIRTIO 0x7E000000 /* i/o registers */ -#define FRAMEBUFFER 0xA0000000 /* video framebuffer */ +#define ARMLOCAL (VIRTIO+IOSIZE) /* armv7 only */ +#define VGPIO (ARMLOCAL+MiB) /* virtual gpio for pi3 ACT LED */ +#define FRAMEBUFFER 0xC0000000 /* video framebuffer */ #define UZERO 0 /* user segment */ #define UTZERO (UZERO+BY2PG) /* user text start */ -#define USTKTOP 0x20000000 /* user segment end +1 */ +#define USTKTOP 0x40000000 /* user segment end +1 */ #define USTKSIZE (8*1024*1024) /* user stack size */ +#define TSTKTOP (USTKTOP-USTKSIZE) /* sysexec temporary stack */ +#define TSTKSIZ 256 /* address at which to copy and execute rebootcode */ -#define REBOOTADDR (KZERO+0x3400) +#define REBOOTADDR (KZERO+0x1800) /* * Legacy... */ -#define BLOCKALIGN 32 /* only used in allocb.c */ +#define BLOCKALIGN 64 /* only used in allocb.c */ #define KSTACK KSTKSIZE /* @@ -71,7 +79,6 @@ #define BY2WD 4 #define BY2V 8 /* only used in xalloc.c */ -#define CACHELINESZ 32 #define PTEMAPMEM (1024*1024) #define PTEPERTAB (PTEMAPMEM/BY2PG) #define SEGMAPSIZE 1984 @@ -93,8 +100,7 @@ * BUS addresses as seen from the videocore gpu. */ #define PHYSDRAM 0 -#define BUSDRAM 0x40000000 -#define DRAMSIZE (512*MiB) -#define PHYSIO 0x20000000 -#define BUSIO 0x7E000000 #define IOSIZE (16*MiB) + +#define MIN(a, b) ((a) < (b)? (a): (b)) +#define MAX(a, b) ((a) > (b)? (a): (b)) diff --git a/sys/src/9/bcm/mkfile b/sys/src/9/bcm/mkfile index e2bd4c170..a9673a7ce 100644 --- a/sys/src/9/bcm/mkfile +++ b/sys/src/9/bcm/mkfile @@ -1,7 +1,7 @@ -CONF=pif -CONFLIST=pif picpuf +CONF=pi2 +CONFLIST=pi pi2 +CRAPLIST=pif picpuf EXTRACOPIES= -#EXTRACOPIES=''piestand lookout boundary # bovril loadaddr=0x80008000 @@ -104,12 +104,13 @@ install:V: /$objtype/$p$CONF arch.$O clock.$O fpiarm.$O main.$O mmu.$O screen.$O syscall.$O trap.$O: \ /$objtype/include/ureg.h -archbcm.$O: ../port/flashif.h fpi.$O fpiarm.$O fpimem.$O: fpi.h -l.$O lexception.$O lproc.$O mmu.$O: arm.s mem.h +l.$O lexception.$O lproc.$O mmu.$O: mem.h +l.$O lexception.$O lproc.$O armv6.$O armv7.$O: arm.s +armv7.$O: cache.v7.s main.$O: errstr.h init.h reboot.h devmouse.$O mouse.$O screen.$O: screen.h -devusb.$O: ../port/usb.h +usbdwc.$O: dwcotg.h ../port/usb.h init.h:D: ../port/initcode.c init9.s $CC ../port/initcode.c @@ -123,11 +124,12 @@ init.h:D: ../port/initcode.c init9.s reboot.h:D: rebootcode.s arm.s arm.h mem.h $AS rebootcode.s # -lc is only for memmove. -T arg is PADDR(REBOOTADDR) - $LD -l -s -T0x3400 -R4 -o reboot.out rebootcode.$O -lc + $LD -l -s -T0x1800 -R4 -o reboot.out rebootcode.$O -lc {echo 'uchar rebootcode[]={' xd -1x reboot.out | sed -e '1,2d' -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' echo '};'} > reboot.h + errstr.h:D: ../port/mkerrstr ../port/error.h rc ../port/mkerrstr > errstr.h diff --git a/sys/src/9/bcm/mmu.c b/sys/src/9/bcm/mmu.c index 3ccf38ffc..af647285f 100644 --- a/sys/src/9/bcm/mmu.c +++ b/sys/src/9/bcm/mmu.c @@ -9,66 +9,75 @@ #define FEXT(d, o, w) (((d)>>(o)) & ((1<<(w))-1)) #define L1X(va) FEXT((va), 20, 12) #define L2X(va) FEXT((va), 12, 8) +#define L2AP(ap) l2ap(ap) +#define L1ptedramattrs soc.l1ptedramattrs +#define L2ptedramattrs soc.l2ptedramattrs enum { L1lo = UZERO/MiB, /* L1X(UZERO)? */ L1hi = (USTKTOP+MiB-1)/MiB, /* L1X(USTKTOP+MiB-1)? */ + L2size = 256*sizeof(PTE), }; +/* + * Set up initial PTEs for cpu0 (called with mmu off) + */ void -mmuinit(void) +mmuinit(void *a) { PTE *l1, *l2; uintptr pa, va; - l1 = (PTE*)PADDR(L1); + l1 = (PTE*)a; l2 = (PTE*)PADDR(L2); /* * map all of ram at KZERO */ va = KZERO; - for(pa = PHYSDRAM; pa < PHYSDRAM+DRAMSIZE; pa += MiB){ - l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section|Cached|Buffered; + for(pa = PHYSDRAM; pa < PHYSDRAM+soc.dramsize; pa += MiB){ + l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section|L1ptedramattrs; va += MiB; } /* * identity map first MB of ram so mmu can be enabled */ - l1[L1X(PHYSDRAM)] = PHYSDRAM|Dom0|L1AP(Krw)|Section|Cached|Buffered; + l1[L1X(PHYSDRAM)] = PHYSDRAM|Dom0|L1AP(Krw)|Section|L1ptedramattrs; /* * map i/o registers */ va = VIRTIO; - for(pa = PHYSIO; pa < PHYSIO+IOSIZE; pa += MiB){ + for(pa = soc.physio; pa < soc.physio+IOSIZE; pa += MiB){ l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section; va += MiB; } - + pa = soc.armlocal; + if(pa) + l1[L1X(va)] = pa|Dom0|L1AP(Krw)|Section; + /* - * double map exception vectors at top of virtual memory + * double map exception vectors near top of virtual memory */ va = HVECTORS; l1[L1X(va)] = (uintptr)l2|Dom0|Coarse; - l2[L2X(va)] = PHYSDRAM|L2AP(Krw)|Small; + l2[L2X(va)] = PHYSDRAM|L2AP(Krw)|Small|L2ptedramattrs; } void -mmuinit1(void) +mmuinit1() { PTE *l1; - l1 = (PTE*)L1; - m->mmul1 = l1; + l1 = m->mmul1; /* * undo identity map of first MB of ram */ l1[L1X(PHYSDRAM)] = 0; - cachedwbse(&l1[L1X(PHYSDRAM)], sizeof(PTE)); - mmuinvalidate(); + cachedwbtlb(&l1[L1X(PHYSDRAM)], sizeof(PTE)); + mmuinvalidateaddr(PHYSDRAM); } static void @@ -81,10 +90,11 @@ mmul2empty(Proc* proc, int clear) l2 = &proc->mmul2; for(page = *l2; page != nil; page = page->next){ if(clear) - memset(UINT2PTR(page->va), 0, BY2PG); + memset(UINT2PTR(page->va), 0, L2size); l1[page->daddr] = Fault; l2 = &page->next; } + coherence(); *l2 = proc->mmul2cache; proc->mmul2cache = proc->mmul2; proc->mmul2 = nil; @@ -93,29 +103,24 @@ mmul2empty(Proc* proc, int clear) static void mmul1empty(void) { -#ifdef notdef -/* there's a bug in here */ PTE *l1; /* clean out any user mappings still in l1 */ - if(m->mmul1lo > L1lo){ + if(m->mmul1lo > 0){ if(m->mmul1lo == 1) m->mmul1[L1lo] = Fault; else memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE)); - m->mmul1lo = L1lo; + m->mmul1lo = 0; } - if(m->mmul1hi < L1hi){ - l1 = &m->mmul1[m->mmul1hi]; - if((L1hi - m->mmul1hi) == 1) + if(m->mmul1hi > 0){ + l1 = &m->mmul1[L1hi - m->mmul1hi]; + if(m->mmul1hi == 1) *l1 = Fault; else - memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE)); - m->mmul1hi = L1hi; + memset(l1, 0, m->mmul1hi*sizeof(PTE)); + m->mmul1hi = 0; } -#else - memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE)); -#endif /* notdef */ } void @@ -125,15 +130,7 @@ mmuswitch(Proc* proc) PTE *l1; Page *page; - /* do kprocs get here and if so, do they need to? */ - if(m->mmupid == proc->pid && !proc->newtlb) - return; - m->mmupid = proc->pid; - - /* write back dirty and invalidate l1 caches */ - cacheuwbinv(); - - if(proc->newtlb){ + if(proc != nil && proc->newtlb){ mmul2empty(proc, 1); proc->newtlb = 0; } @@ -142,19 +139,21 @@ mmuswitch(Proc* proc) /* move in new map */ l1 = m->mmul1; + if(proc != nil) for(page = proc->mmul2; page != nil; page = page->next){ x = page->daddr; l1[x] = PPN(page->pa)|Dom0|Coarse; - /* know here that L1lo < x < L1hi */ - if(x+1 - m->mmul1lo < m->mmul1hi - x) - m->mmul1lo = x+1; - else - m->mmul1hi = x; + if(x >= L1lo + m->mmul1lo && x < L1hi - m->mmul1hi){ + if(x+1 - L1lo < L1hi - x) + m->mmul1lo = x+1 - L1lo; + else + m->mmul1hi = L1hi - x; + } } /* make sure map is in memory */ /* could be smarter about how much? */ - cachedwbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE)); + cachedwbtlb(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE)); /* lose any possible stale tlb entries */ mmuinvalidate(); @@ -176,9 +175,6 @@ mmurelease(Proc* proc) { Page *page, *next; - /* write back dirty and invalidate l1 caches */ - cacheuwbinv(); - mmul2empty(proc, 0); for(page = proc->mmul2cache; page != nil; page = next){ next = page->next; @@ -194,7 +190,7 @@ mmurelease(Proc* proc) /* make sure map is in memory */ /* could be smarter about how much? */ - cachedwbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE)); + cachedwbtlb(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE)); /* lose any possible stale tlb entries */ mmuinvalidate(); @@ -203,39 +199,45 @@ mmurelease(Proc* proc) void putmmu(uintptr va, uintptr pa, Page* page) { - int x; + int x, s; Page *pg; PTE *l1, *pte; + /* + * disable interrupts to prevent flushmmu (called from hzclock) + * from clearing page tables while we are setting them + */ + s = splhi(); x = L1X(va); l1 = &m->mmul1[x]; if(*l1 == Fault){ - /* wasteful - l2 pages only have 256 entries - fix */ + /* l2 pages only have 256 entries - wastes 3K per 1M of address space */ if(up->mmul2cache == nil){ - /* auxpg since we don't need much? memset if so */ + spllo(); pg = newpage(1, 0, 0); + splhi(); + /* if newpage slept, we might be on a different cpu */ + l1 = &m->mmul1[x]; pg->va = VA(kmap(pg)); - } - else{ + }else{ pg = up->mmul2cache; up->mmul2cache = pg->next; - memset(UINT2PTR(pg->va), 0, BY2PG); } pg->daddr = x; pg->next = up->mmul2; up->mmul2 = pg; - /* force l2 page to memory */ - cachedwbse((void *)pg->va, BY2PG); + /* force l2 page to memory (armv6) */ + cachedwbtlb((void *)pg->va, L2size); *l1 = PPN(pg->pa)|Dom0|Coarse; - cachedwbse(l1, sizeof *l1); + cachedwbtlb(l1, sizeof *l1); - if(x >= m->mmul1lo && x < m->mmul1hi){ - if(x+1 - m->mmul1lo < m->mmul1hi - x) - m->mmul1lo = x+1; + if(x >= L1lo + m->mmul1lo && x < L1hi - m->mmul1hi){ + if(x+1 - L1lo < L1hi - x) + m->mmul1lo = x+1 - L1lo; else - m->mmul1hi = x; + m->mmul1hi = L1hi - x; } } pte = UINT2PTR(KADDR(PPN(*l1))); @@ -247,29 +249,51 @@ putmmu(uintptr va, uintptr pa, Page* page) */ x = Small; if(!(pa & PTEUNCACHED)) - x |= Cached|Buffered; + x |= L2ptedramattrs; if(pa & PTEWRITE) x |= L2AP(Urw); else x |= L2AP(Uro); pte[L2X(va)] = PPN(pa)|x; - cachedwbse(&pte[L2X(va)], sizeof pte[0]); + cachedwbtlb(&pte[L2X(va)], sizeof(PTE)); /* clear out the current entry */ mmuinvalidateaddr(PPN(va)); - /* write back dirty entries - we need this because the pio() in - * fault.c is writing via a different virt addr and won't clean - * its changes out of the dcache. Page coloring doesn't work - * on this mmu because the virtual cache is set associative - * rather than direct mapped. - */ - cachedwbinv(); - if(page->txtflush){ - cacheiinv(); - page->txtflush = 0; + if((page->txtflush & (1<<m->machno)) != 0){ + /* pio() sets PG_TXTFLUSH whenever a text pg has been written */ + cachedwbse((void*)(page->pa|KZERO), BY2PG); + cacheiinvse((void*)page->va, BY2PG); + page->txtflush &= ~(1<<m->machno); } - checkmmu(va, PPN(pa)); + //checkmmu(va, PPN(pa)); + splx(s); +} + +void* +mmuuncache(void* v, usize size) +{ + int x; + PTE *pte; + uintptr va; + + /* + * Simple helper for ucalloc(). + * Uncache a Section, must already be + * valid in the MMU. + */ + va = PTR2UINT(v); + assert(!(va & (1*MiB-1)) && size == 1*MiB); + + x = L1X(va); + pte = &m->mmul1[x]; + if((*pte & (Fine|Section|Coarse)) != Section) + return nil; + *pte &= ~L1ptedramattrs; + mmuinvalidateaddr(va); + cachedwbinvse(pte, 4); + + return v; } /* @@ -304,15 +328,31 @@ mmukmap(uintptr va, uintptr pa, usize size) *pte++ = (pa+n)|Dom0|L1AP(Krw)|Section; mmuinvalidateaddr(va+n); } - cachedwbse(pte0, (uintptr)pte - (uintptr)pte0); + cachedwbtlb(pte0, (uintptr)pte - (uintptr)pte0); return va + o; } - void checkmmu(uintptr va, uintptr pa) { - USED(va); - USED(pa); + int x; + PTE *l1, *pte; + + x = L1X(va); + l1 = &m->mmul1[x]; + if(*l1 == Fault){ + iprint("checkmmu cpu%d va=%lux l1 %p=%ux\n", m->machno, va, l1, *l1); + return; + } + pte = KADDR(PPN(*l1)); + pte += L2X(va); + if(pa == ~0 || (pa != 0 && PPN(*pte) != pa)) + iprint("checkmmu va=%lux pa=%lux l1 %p=%ux pte %p=%ux\n", va, pa, l1, *l1, pte, *pte); } +void +kunmap(KMap *k) +{ + USED(k); + coherence(); +} diff --git a/sys/src/9/bcm/pi b/sys/src/9/bcm/pi new file mode 100644 index 000000000..c5518e4bc --- /dev/null +++ b/sys/src/9/bcm/pi @@ -0,0 +1,55 @@ +dev + root + cons + swap + env + pipe + proc + mnt + srv + shr + swap + dup + arch + ssl + tls + cap + fs + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno + draw screen swcursor + mouse mouse + uart gpio + gpio gpio + sd + usb + +link + loopbackmedium + ethermedium + archbcm + usbdwc + +ip + tcp + udp + ipifc + icmp + icmp6 + ipmux + +misc + armv6 + uartmini +# sdmmc emmc + dma + vcore + vfp3 coproc + +port + int cpuserver = 0; + +bootdir + /$objtype/bin/paqfs + /$objtype/bin/auth/factotum + bootfs.paq + boot diff --git a/sys/src/9/bcm/pif b/sys/src/9/bcm/pi2 index 45f4c4394..c7a1978c2 100644 --- a/sys/src/9/bcm/pif +++ b/sys/src/9/bcm/pi2 @@ -8,6 +8,7 @@ dev mnt srv shr + swap dup arch ssl @@ -17,16 +18,15 @@ dev ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno draw screen swcursor mouse mouse - uart - gpio + uart gpio + gpio gpio sd usb link - ethermedium loopbackmedium - netdevmedium - archbcm + ethermedium + archbcm2 usbdwc ip @@ -36,9 +36,9 @@ ip icmp icmp6 ipmux - il misc + armv7 uartmini sdmmc emmc dma @@ -53,4 +53,3 @@ bootdir /$objtype/bin/auth/factotum bootfs.paq boot - diff --git a/sys/src/9/bcm/rebootcode.s b/sys/src/9/bcm/rebootcode.s index 8e924ccfd..a685debb3 100644 --- a/sys/src/9/bcm/rebootcode.s +++ b/sys/src/9/bcm/rebootcode.s @@ -1,8 +1,13 @@ /* - * armv6 reboot code + * armv6/armv7 reboot code */ #include "arm.s" +#define PTEDRAM (Dom0|L1AP(Krw)|Section) + +#define WFI WORD $0xe320f003 /* wait for interrupt */ +#define WFE WORD $0xe320f002 /* wait for event */ + /* * Turn off MMU, then copy the new kernel to its correct location * in physical memory. Then jump to the start of the kernel. @@ -15,7 +20,7 @@ TEXT main(SB), 1, $-4 /* copy in arguments before stack gets unmapped */ MOVW R0, R8 /* entry point */ MOVW p2+4(FP), R9 /* source */ - MOVW n+8(FP), R10 /* byte count */ + MOVW n+8(FP), R6 /* byte count */ /* SVC mode, interrupts disabled */ MOVW $(PsrDirq|PsrDfiq|PsrMsvc), R1 @@ -29,6 +34,28 @@ TEXT main(SB), 1, $-4 BIC $CpCmmu, R1 MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + /* continue with reboot only on cpu0 */ + CPUID(R2) + BEQ bootcpu + + /* other cpus wait for inter processor interrupt from cpu0 */ + /* turn icache back on */ + MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + ORR $(CpCicache), R1 + MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + BARRIERS +dowfi: + WFI + MOVW $0x40000060, R1 + ADD R2<<2, R1 + MOVW 0(R1), R0 + AND $0x10, R0 + BEQ dowfi + MOVW $0x8000, R1 + BL (R1) + B dowfi + +bootcpu: /* set up a tiny stack for local vars and memmove args */ MOVW R8, SP /* stack top just before kernel dest */ SUB $20, SP /* allocate stack frame */ @@ -37,11 +64,12 @@ TEXT main(SB), 1, $-4 MOVW R8, 16(SP) /* save dest (entry point) */ MOVW R8, R0 /* first arg is dest */ MOVW R9, 8(SP) /* push src */ - MOVW R10, 12(SP) /* push size */ + MOVW R6, 12(SP) /* push size */ BL memmove(SB) MOVW 16(SP), R8 /* restore entry point */ /* jump to kernel physical entry point */ + ORR R8,R8 B (R8) B 0(PC) @@ -51,43 +79,40 @@ TEXT main(SB), 1, $-4 * clobbers R0-R2, and returns with SP invalid. */ TEXT cachesoff(SB), 1, $-4 + MOVM.DB.W [R14,R1-R10], (R13) /* save regs on stack */ - /* write back and invalidate caches */ - BARRIERS - MOVW $0, R0 - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEwbi), CpCACHEall - MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall - - /* turn caches off */ + /* turn caches off, invalidate icache */ MRC CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl BIC $(CpCdcache|CpCicache|CpCpredict), R1 MCR CpSC, 0, R1, C(CpCONTROL), C(0), CpMainctl + MOVW $0, R0 + MCR CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEall /* invalidate stale TLBs before changing them */ BARRIERS - MOVW $KZERO, R0 /* some valid virtual address */ + MOVW $0, R0 MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv BARRIERS - /* from here on, R0 is base of physical memory */ - MOVW $PHYSDRAM, R0 - /* redo double map of first MiB PHYSDRAM = KZERO */ - MOVW $(L1+L1X(PHYSDRAM)), R2 /* address of PHYSDRAM's PTE */ + MOVW 12(R(MACH)), R2 /* m->mmul1 (virtual addr) */ MOVW $PTEDRAM, R1 /* PTE bits */ - ORR R0, R1 /* dram base */ MOVW R1, (R2) + DSB + MCR CpSC, 0, R2, C(CpCACHE), C(CpCACHEwb), CpCACHEse /* invalidate stale TLBs again */ BARRIERS + MOVW $0, R0 MCR CpSC, 0, R0, C(CpTLB), C(CpTLBinvu), CpTLBinv BARRIERS /* relocate SB and return address to PHYSDRAM addressing */ MOVW $KSEGM, R1 /* clear segment bits */ BIC R1, R12 /* adjust SB */ - ORR R0, R12 + MOVM.IA.W (R13), [R14,R1-R10] /* restore regs from stack */ + + MOVW $KSEGM, R1 /* clear segment bits */ BIC R1, R14 /* adjust return address */ - ORR R0, R14 RET diff --git a/sys/src/9/bcm/trap.c b/sys/src/9/bcm/trap.c index b6bf34389..ae6489cba 100644 --- a/sys/src/9/bcm/trap.c +++ b/sys/src/9/bcm/trap.c @@ -13,6 +13,7 @@ #include "arm.h" #define INTREGS (VIRTIO+0xB200) +#define LOCALREGS (VIRTIO+IOSIZE) typedef struct Intregs Intregs; typedef struct Vctl Vctl; @@ -20,6 +21,10 @@ typedef struct Vctl Vctl; enum { Nvec = 8, /* # of vectors at start of lexception.s */ Fiqenable = 1<<7, + + Localtimerint = 0x40, + Localmboxint = 0x50, + Localintpending = 0x60, }; /* @@ -46,12 +51,14 @@ struct Intregs { struct Vctl { Vctl *next; int irq; + int cpu; u32int *reg; u32int mask; void (*f)(Ureg*, void*); void *a; }; +static Lock vctllock; static Vctl *vctl, *vfiq; static char *trapnames[PsrMask+1] = { @@ -75,14 +82,16 @@ trapinit(void) { Vpage0 *vpage0; - /* disable everything */ - intrsoff(); - - /* set up the exception vectors */ - vpage0 = (Vpage0*)HVECTORS; - memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors)); - memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable)); - cacheuwbinv(); + if (m->machno == 0) { + /* disable everything */ + intrsoff(); + /* set up the exception vectors */ + vpage0 = (Vpage0*)HVECTORS; + memmove(vpage0->vectors, vectors, sizeof(vpage0->vectors)); + memmove(vpage0->vtable, vtable, sizeof(vpage0->vtable)); + cacheuwbinv(); + l2cacheuwbinv(); + } /* set up the stacks for the interrupt modes */ setr13(PsrMfiq, (u32int*)(FIQSTKTOP)); @@ -95,6 +104,21 @@ trapinit(void) } void +intrcpushutdown(void) +{ + u32int *enable; + + if(soc.armlocal == 0) + return; + enable = (u32int*)(LOCALREGS + Localtimerint) + m->machno; + *enable = 0; + if(m->machno){ + enable = (u32int*)(LOCALREGS + Localmboxint) + m->machno; + *enable = 1; + } +} + +void intrsoff(void) { Intregs *ip; @@ -120,11 +144,11 @@ irq(Ureg* ureg) clockintr = 0; for(v = vctl; v; v = v->next) - if(*v->reg & v->mask){ + if(v->cpu == m->machno && (*v->reg & v->mask) != 0){ coherence(); v->f(ureg, v->a); coherence(); - if(v->irq == IRQclock) + if(v->irq == IRQclock || v->irq == IRQcntps || v->irq == IRQcntpns) clockintr = 1; } return clockintr; @@ -140,7 +164,7 @@ fiq(Ureg *ureg) v = vfiq; if(v == nil) - panic("unexpected item in bagging area"); + panic("cpu%d: unexpected item in bagging area", m->machno); m->intr++; ureg->pc -= 4; coherence(); @@ -160,7 +184,16 @@ irqenable(int irq, void (*f)(Ureg*, void*), void* a) if(v == nil) panic("irqenable: no mem"); v->irq = irq; - if(irq >= IRQbasic){ + v->cpu = 0; + if(irq >= IRQlocal){ + v->reg = (u32int*)(LOCALREGS + Localintpending) + m->machno; + if(irq >= IRQmbox0) + enable = (u32int*)(LOCALREGS + Localmboxint) + m->machno; + else + enable = (u32int*)(LOCALREGS + Localtimerint) + m->machno; + v->mask = 1 << (irq - IRQlocal); + v->cpu = m->machno; + }else if(irq >= IRQbasic){ enable = &ip->ARMenable; v->reg = &ip->ARMpending; v->mask = 1 << (irq - IRQbasic); @@ -171,6 +204,7 @@ irqenable(int irq, void (*f)(Ureg*, void*), void* a) } v->f = f; v->a = a; + lock(&vctllock); if(irq == IRQfiq){ assert((ip->FIQctl & Fiqenable) == 0); assert((*enable & v->mask) == 0); @@ -179,8 +213,15 @@ irqenable(int irq, void (*f)(Ureg*, void*), void* a) }else{ v->next = vctl; vctl = v; - *enable = v->mask; + if(irq >= IRQmbox0){ + if(irq <= IRQmbox3) + *enable |= 1 << (irq - IRQmbox0); + }else if(irq >= IRQlocal) + *enable |= 1 << (irq - IRQlocal); + else + *enable = v->mask; } + unlock(&vctllock); } static char * diff --git a/sys/src/9/bcm/uartmini.c b/sys/src/9/bcm/uartmini.c index 11e542f55..4f5c9cd8a 100644 --- a/sys/src/9/bcm/uartmini.c +++ b/sys/src/9/bcm/uartmini.c @@ -10,35 +10,11 @@ #include "fns.h" #include "io.h" -#define GPIOREGS (VIRTIO+0x200000) #define AUXREGS (VIRTIO+0x215000) #define OkLed 16 #define TxPin 14 #define RxPin 15 -/* GPIO regs */ -enum { - Fsel0 = 0x00>>2, - FuncMask= 0x7, - Input = 0x0, - Output = 0x1, - Alt0 = 0x4, - Alt1 = 0x5, - Alt2 = 0x6, - Alt3 = 0x7, - Alt4 = 0x3, - Alt5 = 0x2, - Set0 = 0x1c>>2, - Clr0 = 0x28>>2, - Lev0 = 0x34>>2, - PUD = 0x94>>2, - Off = 0x0, - Pulldown= 0x1, - Pullup = 0x2, - PUDclk0 = 0x98>>2, - PUDclk1 = 0x9c>>2, -}; - /* AUX regs */ enum { Irq = 0x00>>2, @@ -73,56 +49,11 @@ static Uart miniuart = { .regs = (u32int*)AUXREGS, .name = "uart0", .freq = 250000000, + .baud = 115200, .phys = &miniphysuart, }; -void -gpiosel(uint pin, int func) -{ - u32int *gp, *fsel; - int off; - - gp = (u32int*)GPIOREGS; - fsel = &gp[Fsel0 + pin/10]; - off = (pin % 10) * 3; - *fsel = (*fsel & ~(FuncMask << off)) | func << off; -} - -void -gpiopulloff(uint pin) -{ - u32int *gp, *reg; - u32int mask; - - gp = (u32int*)GPIOREGS; - reg = &gp[PUDclk0 + pin/32]; - mask = 1 << (pin % 32); - gp[PUD] = Off; - microdelay(1); - *reg = mask; - microdelay(1); - *reg = 0; -} - -void -gpioout(uint pin, int set) -{ - u32int *gp; - int v; - - gp = (u32int*)GPIOREGS; - v = set? Set0: Clr0; - gp[v + pin/32] = 1 << (pin % 32); -} - -int -gpioin(uint pin) -{ - u32int *gp; - - gp = (u32int*)GPIOREGS; - return (gp[Lev0 + pin/32] & (1 << (pin % 32))) != 0; -} +static int baud(Uart*, int); static void interrupt(Ureg*, void *arg) @@ -162,10 +93,12 @@ enable(Uart *uart, int ie) gpiosel(TxPin, Alt5); gpiosel(RxPin, Alt5); gpiopulloff(TxPin); - gpiopulloff(RxPin); + gpiopullup(RxPin); ap[Enables] |= UartEn; ap[MuIir] = 6; + ap[MuLcr] = Bits8; ap[MuCntl] = TxEn|RxEn; + baud(uart, uart->baud); if(ie){ intrenable(IRQaux, interrupt, uart, 0, "uart"); ap[MuIer] = RxIen|TxIen; @@ -370,12 +303,11 @@ uartconsinit(void) break; } - uartctl(uart, "b9600 l8 pn s1"); - if(*cmd != '\0') - uartctl(uart, cmd); - if(!uart->enabled) (*uart->phys->enable)(uart, 0); + uartctl(uart, "l8 pn s1"); + if(*cmd != '\0') + uartctl(uart, cmd); consuart = uart; uart->console = 1; @@ -405,8 +337,26 @@ void okay(int on) { static int first; + static int okled, polarity; + char *p; - if(!first++) - gpiosel(OkLed, Output); - gpioout(OkLed, !on); + if(!first++){ + p = getconf("bcm2709.disk_led_gpio"); + if(p == nil) + p = getconf("bcm2708.disk_led_gpio"); + if(p != nil) + okled = strtol(p, 0, 0); + else + okled = 'v'; + p = getconf("bcm2709.disk_led_active_low"); + if(p == nil) + p = getconf("bcm2708.disk_led_active_low"); + polarity = (p == nil || *p == '1'); + if(okled != 'v') + gpiosel(okled, Output); + } + if(okled == 'v') + vgpset(0, on); + else if(okled != 0) + gpioout(okled, on^polarity); } diff --git a/sys/src/9/bcm/usbdwc.c b/sys/src/9/bcm/usbdwc.c index a7292ba5f..b41da760b 100644 --- a/sys/src/9/bcm/usbdwc.c +++ b/sys/src/9/bcm/usbdwc.c @@ -33,16 +33,29 @@ enum Read = 0, Write = 1, + + /* + * Workaround for an unexplained glitch where an Ack interrupt + * is received without Chhltd, whereupon all channels remain + * permanently busy and can't be halted. This was only seen + * when the controller is reading a sequence of bulk input + * packets in DMA mode. Setting Slowbulkin=1 will avoid the + * lockup by reading packets individually with an interrupt + * after each. More recent chips don't seem to exhibit the + * problem, so it's probably safe to leave this off now. + */ + Slowbulkin = 0, }; typedef struct Ctlr Ctlr; typedef struct Epio Epio; struct Ctlr { + Lock; Dwcregs *regs; /* controller registers */ int nchan; /* number of host channels */ ulong chanbusy; /* bitmap of in-use channels */ - QLock chanlock; /* serialise access to chanbusy */ + Lock chanlock; /* serialise access to chanbusy */ QLock split; /* serialise split transactions */ int splitretry; /* count retries of Nyet */ int sofchan; /* bitmap of channels waiting for sof */ @@ -52,7 +65,11 @@ struct Ctlr { }; struct Epio { - QLock; + union { + QLock rlock; + QLock ctllock; + }; + QLock wlock; Block *cb; ulong lastpoll; }; @@ -61,29 +78,48 @@ static Ctlr dwc; static int debug; static char Ebadlen[] = "bad usb request length"; -static char Enotconfig[] = "usb endpoint not configured"; static void clog(Ep *ep, Hostchan *hc); static void logdump(Ep *ep); +static void +filock(Lock *l) +{ + int x; + + x = splfhi(); + ilock(l); + l->sr = x; +} + +static void +fiunlock(Lock *l) +{ + iunlock(l); +} + static Hostchan* chanalloc(Ep *ep) { Ctlr *ctlr; int bitmap, i; + static int first; ctlr = ep->hp->aux; - qlock(&ctlr->chanlock); +retry: + lock(&ctlr->chanlock); bitmap = ctlr->chanbusy; for(i = 0; i < ctlr->nchan; i++) if((bitmap & (1<<i)) == 0){ ctlr->chanbusy = bitmap | 1<<i; - qunlock(&ctlr->chanlock); + unlock(&ctlr->chanlock); return &ctlr->regs->hchan[i]; } - qunlock(&ctlr->chanlock); - panic("miller is a lazy git"); - return nil; + unlock(&ctlr->chanlock); + if(!first++) + print("usbdwc: all host channels busy - retrying\n"); + tsleep(&up->sleep, return0, 0, 1); + goto retry; } static void @@ -94,9 +130,9 @@ chanrelease(Ep *ep, Hostchan *chan) ctlr = ep->hp->aux; i = chan - ctlr->regs->hchan; - qlock(&ctlr->chanlock); + lock(&ctlr->chanlock); ctlr->chanbusy &= ~(1<<i); - qunlock(&ctlr->chanlock); + unlock(&ctlr->chanlock); } static void @@ -158,23 +194,22 @@ sofdone(void *a) Dwcregs *r; r = a; - return r->gintsts & Sofintr; + return (r->gintmsk & Sofintr) == 0; } static void sofwait(Ctlr *ctlr, int n) { Dwcregs *r; - int x; r = ctlr->regs; do{ + filock(ctlr); r->gintsts = Sofintr; - x = splfhi(); ctlr->sofchan |= 1<<n; r->gintmsk |= Sofintr; + fiunlock(ctlr); sleep(&ctlr->chanintr[n], sofdone, r); - splx(x); }while((r->hfnum & 7) == 6); } @@ -192,7 +227,7 @@ chandone(void *a) static int chanwait(Ep *ep, Ctlr *ctlr, Hostchan *hc, int mask) { - int intr, n, x, ointr; + int intr, n, ointr; ulong start, now; Dwcregs *r; @@ -200,13 +235,14 @@ chanwait(Ep *ep, Ctlr *ctlr, Hostchan *hc, int mask) n = hc - r->hchan; for(;;){ restart: - x = splfhi(); + filock(ctlr); r->haintmsk |= 1<<n; hc->hcintmsk = mask; - sleep(&ctlr->chanintr[n], chandone, hc); + fiunlock(ctlr); + tsleep(&ctlr->chanintr[n], chandone, hc, 1000); + if((intr = hc->hcint) == 0) + goto restart; hc->hcintmsk = 0; - splx(x); - intr = hc->hcint; if(intr & Chhltd) return intr; start = fastticks(0); @@ -218,13 +254,14 @@ restart: if((ointr != Ack && ointr != (Ack|Xfercomp)) || intr != (Ack|Chhltd|Xfercomp) || (now - start) > 60) - dprint("await %x after %ld %x -> %x\n", + dprint("await %x after %ldµs %x -> %x\n", mask, now - start, ointr, intr); return intr; } if((intr & mask) == 0){ - dprint("ep%d.%d await %x intr %x -> %x\n", - ep->dev->nb, ep->nb, mask, ointr, intr); + if(intr != Nak) + dprint("ep%d.%d await %x after %ldµs intr %x -> %x\n", + ep->dev->nb, ep->nb, mask, now - start, ointr, intr); goto restart; } now = fastticks(0); @@ -254,6 +291,8 @@ chanintr(Ctlr *ctlr, int n) int i; hc = &ctlr->regs->hchan[n]; + if((hc->hcint & hc->hcintmsk) == 0) + return 1; if(ctlr->debugchan & (1<<n)) clog(nil, hc); if((hc->hcsplt & Spltena) == 0) @@ -347,7 +386,7 @@ chanio(Ep *ep, Hostchan *hc, int dir, int pid, void *a, int len) else n = len; hc->hctsiz = n | npkt<<OPktcnt | pid; - hc->hcdma = PADDR(a); + hc->hcdma = dmaaddr(a); nleft = len; logstart(ep); @@ -378,13 +417,19 @@ chanio(Ep *ep, Hostchan *hc, int dir, int pid, void *a, int len) } hc->hcchar = (hc->hcchar &~ Chdis) | Chen; clog(ep, hc); +wait: if(ep->ttype == Tbulk && dir == Epin) - i = chanwait(ep, ctlr, hc, /* Ack| */ Chhltd); + i = chanwait(ep, ctlr, hc, Chhltd); else if(ep->ttype == Tintr && (hc->hcsplt & Spltena)) i = chanwait(ep, ctlr, hc, Chhltd); else i = chanwait(ep, ctlr, hc, Chhltd|Nak); clog(ep, hc); + if(hc->hcint != i){ + dprint("chanwait intr %ux->%ux\n", i, hc->hcint); + if((i = hc->hcint) == 0) + goto wait; + } hc->hcint = i; if(hc->hcsplt & Spltena){ @@ -405,12 +450,12 @@ chanio(Ep *ep, Hostchan *hc, int dir, int pid, void *a, int len) continue; } logdump(ep); - print("usbotg: ep%d.%d error intr %8.8ux\n", + print("usbdwc: ep%d.%d error intr %8.8ux\n", ep->dev->nb, ep->nb, i); if(i & ~(Chhltd|Ack)) error(Eio); if(hc->hcdma != hcdma) - print("usbotg: weird hcdma %x->%x intr %x->%x\n", + print("usbdwc: weird hcdma %ux->%ux intr %ux->%ux\n", hcdma, hc->hcdma, i, hc->hcint); } n = hc->hcdma - hcdma; @@ -420,13 +465,13 @@ chanio(Ep *ep, Hostchan *hc, int dir, int pid, void *a, int len) else continue; } - if(dir == Epin && ep->ttype == Tbulk && n == nleft){ + if(dir == Epin && ep->ttype == Tbulk){ nt = (hctsiz & Xfersize) - (hc->hctsiz & Xfersize); if(nt != n){ if(n == ROUND(nt, 4)) n = nt; else - print("usbotg: intr %8.8ux " + print("usbdwc: intr %8.8ux " "dma %8.8ux-%8.8ux " "hctsiz %8.8ux-%8.ux\n", i, hcdma, hc->hcdma, hctsiz, @@ -491,7 +536,7 @@ eptrans(Ep *ep, int rw, void *a, long n) nexterror(); } chansetup(hc, ep); - if(rw == Read && ep->ttype == Tbulk) + if(Slowbulkin && rw == Read && ep->ttype == Tbulk) n = multitrans(ep, hc, rw, a, n); else{ n = chanio(ep, hc, rw == Read? Epin : Epout, ep->toggle[rw], @@ -524,8 +569,8 @@ ctltrans(Ep *ep, uchar *req, long n) if(datalen <= 0 || datalen > Maxctllen) error(Ebadlen); /* XXX cache madness */ - epio->cb = b = allocb(ROUND(datalen, ep->maxpkt) + CACHELINESZ); - b->wp = (uchar*)ROUND((uintptr)b->wp, CACHELINESZ); + epio->cb = b = allocb(ROUND(datalen, ep->maxpkt)); + assert(((uintptr)b->wp & (BLOCKALIGN-1)) == 0); memset(b->wp, 0x55, b->lim - b->wp); cachedwbinvse(b->wp, b->lim - b->wp); data = b->wp; @@ -550,6 +595,7 @@ ctltrans(Ep *ep, uchar *req, long n) }else b->wp += chanio(ep, hc, Epin, DATA1, data, datalen); chanio(ep, hc, Epout, DATA1, nil, 0); + cachedinvse(b->rp, BLEN(b)); n = Rsetuplen; }else{ if(datalen > 0) @@ -627,7 +673,7 @@ init(Hci *hp) greset(r, Rxfflsh); r->grstctl = TXF_ALL; greset(r, Txfflsh); - dprint("usbotg: FIFO depth %d sizes rx/nptx/ptx %8.8ux %8.8ux %8.8ux\n", + dprint("usbdwc: FIFO depth %d sizes rx/nptx/ptx %8.8ux %8.8ux %8.8ux\n", n, r->grxfsiz, r->gnptxfsiz, r->hptxfsiz); r->hport0 = Prtpwr|Prtconndet|Prtenchng|Prtovrcurrchng; @@ -654,6 +700,7 @@ fiqintr(Ureg*, void *a) ctlr = hp->aux; r = ctlr->regs; wakechan = 0; + filock(ctlr); intr = r->gintsts; if(intr & Hcintr){ haint = r->haint & r->haintmsk; @@ -679,6 +726,7 @@ fiqintr(Ureg*, void *a) ctlr->wakechan |= wakechan; armtimerset(1); } + fiunlock(ctlr); } static void @@ -686,14 +734,14 @@ irqintr(Ureg*, void *a) { Ctlr *ctlr; uint wakechan; - int i, x; + int i; ctlr = a; - x = splfhi(); + filock(ctlr); armtimerset(0); wakechan = ctlr->wakechan; ctlr->wakechan = 0; - splx(x); + fiunlock(ctlr); for(i = 0; wakechan; i++){ if(wakechan & 1) wakeup(&ctlr->chanintr[i]); @@ -704,11 +752,12 @@ irqintr(Ureg*, void *a) static void epopen(Ep *ep) { - ddprint("usbotg: epopen ep%d.%d ttype %d\n", + ddprint("usbdwc: epopen ep%d.%d ttype %d\n", ep->dev->nb, ep->nb, ep->ttype); switch(ep->ttype){ - case Tnone: - error(Enotconfig); + default: + error("endpoint type not supported"); + return; case Tintr: assert(ep->pollival > 0); /* fall through */ @@ -717,6 +766,8 @@ epopen(Ep *ep) ep->toggle[Read] = DATA0; if(ep->toggle[Write] == 0) ep->toggle[Write] = DATA0; + /* fall through */ + case Tctl: break; } ep->aux = malloc(sizeof(Epio)); @@ -727,7 +778,7 @@ epopen(Ep *ep) static void epclose(Ep *ep) { - ddprint("usbotg: epclose ep%d.%d ttype %d\n", + ddprint("usbdwc: epclose ep%d.%d ttype %d\n", ep->dev->nb, ep->nb, ep->ttype); switch(ep->ttype){ case Tctl: @@ -743,6 +794,7 @@ static long epread(Ep *ep, void *a, long n) { Epio *epio; + QLock *q; Block *b; uchar *p; ulong elapsed; @@ -750,10 +802,11 @@ epread(Ep *ep, void *a, long n) ddprint("epread ep%d.%d %ld\n", ep->dev->nb, ep->nb, n); epio = ep->aux; + q = ep->ttype == Tctl? &epio->ctllock : &epio->rlock; b = nil; - qlock(epio); + qlock(q); if(waserror()){ - qunlock(epio); + qunlock(q); if(b) freeb(b); nexterror(); @@ -763,7 +816,7 @@ epread(Ep *ep, void *a, long n) error(Egreg); case Tctl: nr = ctldata(ep, a, n); - qunlock(epio); + qunlock(q); poperror(); return nr; case Tintr: @@ -773,13 +826,15 @@ epread(Ep *ep, void *a, long n) /* fall through */ case Tbulk: /* XXX cache madness */ - b = allocb(ROUND(n, ep->maxpkt) + CACHELINESZ); - p = (uchar*)ROUND((uintptr)b->base, CACHELINESZ); - cachedwbinvse(p, n); + b = allocb(ROUND(n, ep->maxpkt)); + p = b->rp; + assert(((uintptr)p & (BLOCKALIGN-1)) == 0); + cachedinvse(p, n); nr = eptrans(ep, Read, p, n); + cachedinvse(p, nr); epio->lastpoll = TK2MS(m->ticks); memmove(a, p, nr); - qunlock(epio); + qunlock(q); freeb(b); poperror(); return nr; @@ -790,16 +845,18 @@ static long epwrite(Ep *ep, void *a, long n) { Epio *epio; + QLock *q; Block *b; uchar *p; ulong elapsed; ddprint("epwrite ep%d.%d %ld\n", ep->dev->nb, ep->nb, n); epio = ep->aux; + q = ep->ttype == Tctl? &epio->ctllock : &epio->wlock; b = nil; - qlock(epio); + qlock(q); if(waserror()){ - qunlock(epio); + qunlock(q); if(b) freeb(b); nexterror(); @@ -815,8 +872,9 @@ epwrite(Ep *ep, void *a, long n) case Tctl: case Tbulk: /* XXX cache madness */ - b = allocb(n + CACHELINESZ); - p = (uchar*)ROUND((uintptr)b->base, CACHELINESZ); + b = allocb(n); + p = b->wp; + assert(((uintptr)p & (BLOCKALIGN-1)) == 0); memmove(p, a, n); cachedwbse(p, n); if(ep->ttype == Tctl) @@ -825,7 +883,7 @@ epwrite(Ep *ep, void *a, long n) n = eptrans(ep, Write, p, n); epio->lastpoll = TK2MS(m->ticks); } - qunlock(epio); + qunlock(q); freeb(b); poperror(); return n; @@ -847,11 +905,11 @@ portenable(Hci *hp, int port, int on) assert(port == 1); ctlr = hp->aux; r = ctlr->regs; - dprint("usbotg enable=%d; sts %#x\n", on, r->hport0); + dprint("usbdwc enable=%d; sts %#x\n", on, r->hport0); if(!on) r->hport0 = Prtpwr | Prtena; tsleep(&up->sleep, return0, 0, Enabledelay); - dprint("usbotg enable=%d; sts %#x\n", on, r->hport0); + dprint("usbdwc enable=%d; sts %#x\n", on, r->hport0); return 0; } @@ -865,7 +923,7 @@ portreset(Hci *hp, int port, int on) assert(port == 1); ctlr = hp->aux; r = ctlr->regs; - dprint("usbotg reset=%d; sts %#x\n", on, r->hport0); + dprint("usbdwc reset=%d; sts %#x\n", on, r->hport0); if(!on) return 0; r->hport0 = Prtpwr | Prtrst; @@ -876,9 +934,9 @@ portreset(Hci *hp, int port, int on) b = s & (Prtconndet|Prtenchng|Prtovrcurrchng); if(b != 0) r->hport0 = Prtpwr | b; - dprint("usbotg reset=%d; sts %#x\n", on, s); + dprint("usbdwc reset=%d; sts %#x\n", on, s); if((s & Prtena) == 0) - print("usbotg: host port not enabled after reset"); + print("usbdwc: host port not enabled after reset"); return 0; } @@ -948,7 +1006,7 @@ reset(Hci *hp) id = ctlr->regs->gsnpsid; if((id>>16) != ('O'<<8 | 'T')) return -1; - dprint("usbotg: rev %d.%3.3x\n", (id>>12)&0xF, id&0xFFF); + dprint("usbdwc: rev %d.%3.3x\n", (id>>12)&0xF, id&0xFFF); intrenable(IRQtimerArm, irqintr, ctlr, 0, "dwc"); diff --git a/sys/src/9/bcm/vcore.c b/sys/src/9/bcm/vcore.c index 2b82db238..f939787b4 100644 --- a/sys/src/9/bcm/vcore.c +++ b/sys/src/9/bcm/vcore.c @@ -12,6 +12,7 @@ typedef struct Prophdr Prophdr; typedef struct Fbinfo Fbinfo; +typedef struct Vgpio Vgpio; enum { Read = 0x00>>2, @@ -33,13 +34,16 @@ enum { TagResp = 1<<31, TagGetfwrev = 0x00000001, - TagGetbrdrev = 0x00010002, + TagGetrev = 0x00010002, TagGetmac = 0x00010003, TagGetram = 0x00010005, TagGetpower = 0x00020001, TagSetpower = 0x00028001, Powerwait = 1<<1, TagGetclkspd= 0x00030002, + TagGetclkmax= 0x00030004, + TagSetclkspd= 0x00038002, + TagGettemp = 0x00030006, TagFballoc = 0x00040001, TagFbfree = 0x00048001, TagFbblank = 0x00040002, @@ -49,8 +53,11 @@ enum { TagSetvres = 0x00048004, TagGetdepth = 0x00040005, TagSetdepth = 0x00048005, - TagGetrgb = 0x00044006, + TagGetrgb = 0x00040006, TagSetrgb = 0x00048006, + TagGetGpio = 0x00040010, + + Nvgpio = 2, }; struct Fbinfo { @@ -76,6 +83,15 @@ struct Prophdr { u32int data[1]; }; +struct Vgpio { + u32int *counts; + u16int incs; + u16int decs; + int ison; +}; + +static Vgpio vgpio; + static void vcwrite(uint chan, int val) { @@ -115,7 +131,8 @@ vcreq(int tag, void *buf, int vallen, int rsplen) uintptr r; int n; Prophdr *prop; - static uintptr base = BUSDRAM; + uintptr aprop; + static int busaddr = 1; if(rsplen < vallen) rsplen = vallen; @@ -132,15 +149,18 @@ vcreq(int tag, void *buf, int vallen, int rsplen) memmove(prop->data, buf, vallen); cachedwbinvse(prop, prop->len); for(;;){ - vcwrite(ChanProps, PADDR(prop) + base); + aprop = busaddr? dmaaddr(prop) : PTR2UINT(prop); + vcwrite(ChanProps, aprop); r = vcread(ChanProps); - if(r == PADDR(prop) + base) + if(r == aprop) break; - if(base == 0) + if(!busaddr) return -1; - base = 0; + busaddr = 0; } - if(prop->req == RspOk && prop->tag == tag && prop->taglen & TagResp) { + if(prop->req == RspOk && + prop->tag == tag && + (prop->taglen&TagResp)) { if((n = prop->taglen & ~TagResp) < rsplen) rsplen = n; memmove(buf, prop->data, rsplen); @@ -158,13 +178,17 @@ static int fbdefault(int *width, int *height, int *depth) { u32int buf[3]; + char *p; if(vcreq(TagGetres, &buf[0], 0, 2*4) != 2*4 || vcreq(TagGetdepth, &buf[2], 0, 4) != 4) return -1; *width = buf[0]; *height = buf[1]; - *depth = buf[2]; + if((p = getconf("bcm2708_fb.fbdepth")) != nil) + *depth = atoi(p); + else + *depth = buf[2]; return 0; } @@ -184,7 +208,7 @@ fbinit(int set, int *width, int *height, int *depth) fi->yres = fi->yresvirtual = *height; fi->bpp = *depth; cachedwbinvse(fi, sizeof(*fi)); - vcwrite(ChanFb, DMAADDR(fi)); + vcwrite(ChanFb, dmaaddr(fi)); if(vcread(ChanFb) != 0) return 0; va = mmukmap(FRAMEBUFFER, PADDR(fi->base), fi->screensize); @@ -213,7 +237,7 @@ setpower(int dev, int on) u32int buf[2]; buf[0] = dev; - buf[1] = Powerwait | (on? 1: 0); + buf[1] = Powerwait | (on? 1 : 0); vcreq(TagSetpower, buf, sizeof buf, sizeof buf); } @@ -250,23 +274,27 @@ getethermac(void) } /* - * Get firmware revision + * Get board revision */ uint -getfirmware(void) +getboardrev(void) { u32int buf[1]; - if(vcreq(TagGetfwrev, buf, 0, sizeof buf) != sizeof buf) + if(vcreq(TagGetrev, buf, 0, sizeof buf) != sizeof buf) return 0; return buf[0]; } +/* + * Get firmware revision + */ uint -getrevision(void) +getfirmware(void) { u32int buf[1]; - if(vcreq(TagGetbrdrev, buf, 0, sizeof buf) != sizeof buf) + + if(vcreq(TagGetfwrev, buf, 0, sizeof buf) != sizeof buf) return 0; return buf[0]; } @@ -299,13 +327,63 @@ getclkrate(int clkid) return buf[1]; } +/* + * Set clock rate to hz (or max speed if hz == 0) + */ +void +setclkrate(int clkid, ulong hz) +{ + u32int buf[2]; + + buf[0] = clkid; + if(hz != 0) + buf[1] = hz; + else if(vcreq(TagGetclkmax, buf, sizeof(buf[0]), sizeof(buf)) != sizeof buf) + return; + vcreq(TagSetclkspd, buf, sizeof(buf), sizeof(buf)); +} + +/* + * Get cpu temperature + */ uint -gettemp(int tempid) +getcputemp(void) { u32int buf[2]; - buf[0] = tempid; - if(vcreq(0x00030006, buf, sizeof(buf[0]), sizeof(buf)) != sizeof buf) - return 0; + buf[0] = 0; + if(vcreq(TagGettemp, buf, sizeof(buf[0]), sizeof buf) != sizeof buf) + return 0; return buf[1]; } + +/* + * Virtual GPIO - used for ACT LED on pi3 + */ +void +vgpinit(void) +{ + u32int buf[1]; + uintptr va; + + buf[0] = 0; + if(vcreq(TagGetGpio, buf, 0, sizeof(buf)) != sizeof buf || buf[0] == 0) + return; + va = mmukmap(VGPIO, buf[0] & ~0xC0000000, BY2PG); + if(va == 0) + return; + vgpio.counts = (u32int*)va; +} + +void +vgpset(uint port, int on) +{ + if(vgpio.counts == nil || port >= Nvgpio || on == vgpio.ison) + return; + if(on) + vgpio.incs++; + else + vgpio.decs++; + vgpio.counts[port] = (vgpio.incs << 16) | vgpio.decs; + vgpio.ison = on; +} diff --git a/sys/src/9/bcm/vfp3.c b/sys/src/9/bcm/vfp3.c index b1c381274..c62af6b7b 100644 --- a/sys/src/9/bcm/vfp3.c +++ b/sys/src/9/bcm/vfp3.c @@ -163,7 +163,10 @@ fpcfg(void) static int printed; /* clear pending exceptions; no traps in vfp3; all v7 ops are scalar */ - m->fpscr = Dn | Fz | FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps; + m->fpscr = Dn | FPRNR | (FPINVAL | FPZDIV | FPOVFL) & ~Alltraps; + /* VFPv2 needs software support for underflows, so force them to zero */ + if(m->havefp == VFPv2) + m->fpscr |= Fz; fpwr(Fpscr, m->fpscr); m->fpconfiged = 1; @@ -278,7 +281,7 @@ fpuprocsave(Proc *p) { if(p->fpstate == FPactive){ if(p->state == Moribund) - fpclear(); + fpoff(); else{ /* * Fpsave() stores without handling pending @@ -371,8 +374,6 @@ mathnote(void) static void mathemu(Ureg *) { - if(m->havefp == VFPv3 && !(fprd(Fpexc) & (Fpex|Fpdex))) - iprint("mathemu: not an FP exception but an unknown FP opcode\n"); switch(up->fpstate){ case FPemu: error("illegal instruction: VFP opcode in emulated mode"); @@ -472,6 +473,7 @@ fpuemu(Ureg* ureg) { int s, nfp, cop, op; uintptr pc; + static int already; if(waserror()){ postnote(up, 1, up->errstr, NDebug); @@ -484,16 +486,14 @@ fpuemu(Ureg* ureg) nfp = 0; pc = ureg->pc; validaddr(pc, 4, 0); - if(!condok(ureg->psr, *(ulong*)pc >> 28)) - iprint("fpuemu: conditional instr shouldn't have got here\n"); op = (*(ulong *)pc >> 24) & MASK(4); cop = (*(ulong *)pc >> 8) & MASK(4); if(m->fpon) fpstuck(pc); /* debugging; could move down 1 line */ if (ISFPAOP(cop, op)) { /* old arm 7500 fpa opcode? */ -// iprint("fpuemu: fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc); -// error("illegal instruction: old arm 7500 fpa opcode"); s = spllo(); + if(!already++) + pprint("warning: emulated arm7500 fpa instr %#8.8lux at %#p\n", *(ulong *)pc, pc); if(waserror()){ splx(s); nexterror(); @@ -503,7 +503,7 @@ fpuemu(Ureg* ureg) m->fppc = m->fpcnt = 0; splx(s); poperror(); - } else if (ISVFPOP(cop, op)) { /* if vfp, fpu must be off */ + } else if (ISVFPOP(cop, op)) { /* if vfp, fpu off or unsupported instruction */ mathemu(ureg); /* enable fpu & retry */ nfp = 1; } |