diff options
641 files changed, 18626 insertions, 13153 deletions
@@ -369,10 +369,10 @@ P: 1024/8462A731 4C 55 86 34 44 59 A7 99 2B 97 88 4A 88 9A 0D 97 D: sun4 port, Sparc hacker N: Hugh Blemings -E: hugh@misc.nu -W: http://misc.nu/hugh/ -D: Author and maintainer of the Keyspan USB to Serial drivers -S: Po Box 234 +E: hugh@blemings.org +W: http://blemings.org/hugh +D: Original author of the Keyspan USB to serial drivers, random PowerPC hacker +S: PO Box 234 S: Belconnen ACT 2616 S: Australia diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc index a0d18dbeb7a9..6a5fd072849d 100644 --- a/Documentation/ABI/testing/sysfs-class-uwb_rc +++ b/Documentation/ABI/testing/sysfs-class-uwb_rc @@ -32,14 +32,16 @@ Contact: linux-usb@vger.kernel.org Description: Write: - <channel> [<bpst offset>] + <channel> - to start beaconing on a specific channel, or stop - beaconing if <channel> is -1. Valid channels depends - on the radio controller's supported band groups. + to force a specific channel to be used when beaconing, + or, if <channel> is -1, to prohibit beaconing. If + <channel> is 0, then the default channel selection + algorithm will be used. Valid channels depends on the + radio controller's supported band groups. - <bpst offset> may be used to try and join a specific - beacon group if more than one was found during a scan. + Reading returns the currently active channel, or -1 if + the radio controller is not beaconing. What: /sys/class/uwb_rc/uwbN/scan Date: July 2008 diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 94bbc27ddd4f..9d620c153b04 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -50,16 +50,17 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets cpu_possible_map = cpu_present_map + additional_cpus (*) Option valid only for following architectures -- x86_64, ia64 +- ia64 -ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT -to determine the number of potentially hot-pluggable cpus. The implementation -should only rely on this to count the # of cpus, but *MUST* not rely on the -apicid values in those tables for disabled apics. In the event BIOS doesn't -mark such hot-pluggable cpus as disabled entries, one could use this -parameter "additional_cpus=x" to represent those cpus in the cpu_possible_map. +ia64 uses the number of disabled local apics in ACPI tables MADT to +determine the number of potentially hot-pluggable cpus. The implementation +should only rely on this to count the # of cpus, but *MUST* not rely +on the apicid values in those tables for disabled apics. In the event +BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could +use this parameter "additional_cpus=x" to represent those cpus in the +cpu_possible_map. -possible_cpus=n [s390 only] use this to set hotpluggable cpus. +possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus. This option sets possible_cpus bits in cpu_possible_map. Thus keeping the numbers of bits set constant even if the machine gets rebooted. diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index dc7c681e532c..df18d87c4837 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -310,15 +310,6 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl> --------------------------- -What: ide-scsi (BLK_DEV_IDESCSI) -When: 2.6.29 -Why: The 2.6 kernel supports direct writing to ide CD drives, which - eliminates the need for ide-scsi. The new method is more - efficient in every way. -Who: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> - ---------------------------- - What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client() When: 2.6.29 (ideally) or 2.6.30 (more likely) Why: Deprecated by the new (standard) device driver binding model. Use diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 23d2f4460deb..ccec55394380 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -394,7 +394,6 @@ prototypes: unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *, unsigned long); }; locking rules: @@ -424,7 +423,6 @@ sendfile: no sendpage: no get_unmapped_area: no check_flags: no -dir_notify: no ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt new file mode 100644 index 000000000000..68dffd87f9b7 --- /dev/null +++ b/Documentation/filesystems/devpts.txt @@ -0,0 +1,132 @@ + +To support containers, we now allow multiple instances of devpts filesystem, +such that indices of ptys allocated in one instance are independent of indices +allocated in other instances of devpts. + +To preserve backward compatibility, this support for multiple instances is +enabled only if: + + - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and + - '-o newinstance' mount option is specified while mounting devpts + +IOW, devpts now supports both single-instance and multi-instance semantics. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and +this referred to as the "legacy" mode. In this mode, the new mount options +(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message +on console. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the +'newinstance' option (as in current start-up scripts) the new mount binds +to the initial kernel mount of devpts. This mode is referred to as the +'single-instance' mode and the current, single-instance semantics are +preserved, i.e PTYs are common across the system. + +The only difference between this single-instance mode and the legacy mode +is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which +can safely be ignored. + +If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified, +the mount is considered to be in the multi-instance mode and a new instance +of the devpts fs is created. Any ptys created in this instance are independent +of ptys in other instances of devpts. Like in the single-instance mode, the +/dev/pts/ptmx node is present. To effectively use the multi-instance mode, +open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or +bind-mount. + +Eg: A container startup script could do the following: + + $ chmod 0666 /dev/pts/ptmx + $ rm /dev/ptmx + $ ln -s pts/ptmx /dev/ptmx + $ ns_exec -cm /bin/bash + + # We are now in new container + + $ umount /dev/pts + $ mount -t devpts -o newinstance lxcpts /dev/pts + $ sshd -p 1234 + +where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs +/bin/bash in the child process. A pty created by the sshd is not visible in +the original mount of /dev/pts. + +User-space changes +------------------ + +In multi-instance mode (i.e '-o newinstance' mount option is specified at least +once), following user-space issues should be noted. + +1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored + and no change is needed to system-startup scripts. + +2. To effectively use multi-instance mode (i.e -o newinstance is specified) + administrators or startup scripts should "redirect" open of /dev/ptmx to + /dev/pts/ptmx using either a bind mount or symlink. + + $ mount -t devpts -o newinstance devpts /dev/pts + + followed by either + + $ rm /dev/ptmx + $ ln -s pts/ptmx /dev/ptmx + $ chmod 666 /dev/pts/ptmx + or + $ mount -o bind /dev/pts/ptmx /dev/ptmx + +3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it + enables better error-reporting and treats both single-instance and + multi-instance mounts similarly. + + But this method requires that system-startup scripts set the mode of + /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the + mode by, either + + - adding ptmxmode mount option to devpts entry in /etc/fstab, or + - using 'chmod 0666 /dev/pts/ptmx' + +4. If multi-instance mode mount is needed for containers, but the system + startup scripts have not yet been updated, container-startup scripts + should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single- + instance mounts. + + Or, in general, container-startup scripts should use: + + mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts + if [ ! -L /dev/ptmx ]; then + mount -o bind /dev/pts/ptmx /dev/ptmx + fi + + When all devpts mounts are multi-instance, /dev/ptmx can permanently be + a symlink to pts/ptmx and the bind mount can be ignored. + +5. A multi-instance mount that is not accompanied by the /dev/ptmx to + /dev/pts/ptmx redirection would result in an unusable/unreachable pty. + + mount -t devpts -o newinstance lxcpts /dev/pts + + immediately followed by: + + open("/dev/ptmx") + + would create a pty, say /dev/pts/7, in the initial kernel mount. + But /dev/pts/7 would be invisible in the new mount. + +6. The permissions for /dev/pts/ptmx node should be specified when mounting + /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000). + + mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts + + The permissions can be later be changed as usual with 'chmod'. + + chmod 666 /dev/pts/ptmx + +7. A mount of devpts without the 'newinstance' option results in binding to + initial kernel mount. This behavior while preserving legacy semantics, + does not provide strict isolation in a container environment. i.e by + mounting devpts without the 'newinstance' option, a container could + get visibility into the 'host' or root container's devpts. + + To workaround this and have strict isolation, all mounts of devpts, + including the mount in the root container, should use the newinstance + option. diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt index bb0142f61084..ac2facc50d2a 100644 --- a/Documentation/filesystems/files.txt +++ b/Documentation/filesystems/files.txt @@ -76,13 +76,13 @@ the fdtable structure - 5. Handling of the file structures is special. Since the look-up of the fd (fget()/fget_light()) are lock-free, it is possible that look-up may race with the last put() operation on the - file structure. This is avoided using atomic_inc_not_zero() + file structure. This is avoided using atomic_long_inc_not_zero() on ->f_count : rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (atomic_inc_not_zero(&file->f_count)) + if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -92,7 +92,7 @@ the fdtable structure - .... return file; - atomic_inc_not_zero() detects if refcounts is already zero or + atomic_long_inc_not_zero() detects if refcounts is already zero or goes to zero during increment. If it does, we fail fget()/fget_light(). diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index dd84ea3c10da..84da2a4ba25a 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -95,6 +95,9 @@ no_chk_data_crc skip checking of CRCs on data nodes in order to of this option is that corruption of the contents of a file can go unnoticed. chk_data_crc (*) do not skip checking CRCs on data nodes +compr=none override default compressor and set it to "none" +compr=lzo override default compressor and set it to "lzo" +compr=zlib override default compressor and set it to "zlib" Quick usage instructions diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 5579bda58a6d..ef19afa186a9 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -733,7 +733,6 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int); @@ -800,8 +799,6 @@ otherwise noted. check_flags: called by the fcntl(2) system call for F_SETFL command - dir_notify: called by the fcntl(2) system call for F_NOTIFY command - flock: called by the flock(2) system call splice_write: called by the VFS to splice data from a pipe to a file. This @@ -931,7 +928,7 @@ manipulate dentries: d_lookup: look up a dentry given its parent and path name component It looks up the child of that given name from the dcache hash table. If it is found, the reference count is incremented - and the dentry is returned. The caller must use d_put() + and the dentry is returned. The caller must use dput() to free the dentry when it finishes using it. For further information on dentry locking, please refer to the document diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index b880ce5dbd33..824699174436 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -97,6 +97,7 @@ Code Seq# Include File Comments <http://linux01.gwdg.de/~alatham/ppdd.html> 'M' all linux/soundcard.h 'N' 00-1F drivers/usb/scanner.h +'O' 00-02 include/mtd/ubi-user.h UBI 'P' all linux/soundcard.h 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h @@ -142,6 +143,9 @@ Code Seq# Include File Comments 'n' 00-7F linux/ncp_fs.h 'n' E0-FF video/matrox.h matroxfb 'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 +'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) +'o' 40-41 include/mtd/ubi-user.h UBI +'o' 01-A1 include/linux/dvb/*.h DVB 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) 'p' 00-3F linux/mc146818rtc.h conflict! 'p' 40-7F linux/nvram.h diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX index 114644285454..e8d2b6d83a3d 100644 --- a/Documentation/kbuild/00-INDEX +++ b/Documentation/kbuild/00-INDEX @@ -1,5 +1,9 @@ 00-INDEX - - this file: info on the kernel build process + - this file: info on the kernel build process +kbuild.txt + - developer information on kbuild +kconfig.txt + - usage help for make *config kconfig-language.txt - specification of Config Language, the language in Kconfig files makefiles.txt diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt new file mode 100644 index 000000000000..51771847e816 --- /dev/null +++ b/Documentation/kbuild/kbuild.txt @@ -0,0 +1,126 @@ +Environment variables + +KCPPFLAGS +-------------------------------------------------- +Additional options to pass when preprocessing. The preprocessing options +will be used in all cases where kbuild do preprocessing including +building C files and assembler files. + +KAFLAGS +-------------------------------------------------- +Additional options to the assembler. + +KCFLAGS +-------------------------------------------------- +Additional options to the C compiler. + +KBUILD_VERBOSE +-------------------------------------------------- +Set the kbuild verbosity. Can be assinged same values as "V=...". +See make help for the full list. +Setting "V=..." takes precedence over KBUILD_VERBOSE. + +KBUILD_EXTMOD +-------------------------------------------------- +Set the directory to look for the kernel source when building external +modules. +The directory can be specified in several ways: +1) Use "M=..." on the command line +2) Environmnet variable KBUILD_EXTMOD +3) Environmnet variable SUBDIRS +The possibilities are listed in the order they take precedence. +Using "M=..." will always override the others. + +KBUILD_OUTPUT +-------------------------------------------------- +Specify the output directory when building the kernel. +The output directory can also be specificed using "O=...". +Setting "O=..." takes precedence over KBUILD_OUTPUT + +ARCH +-------------------------------------------------- +Set ARCH to the architecture to be built. +In most cases the name of the architecture is the same as the +directory name found in the arch/ directory. +But some architectures suach as x86 and sparc has aliases. +x86: i386 for 32 bit, x86_64 for 64 bit +sparc: sparc for 32 bit, sparc64 for 64 bit + +CROSS_COMPILE +-------------------------------------------------- +Specify an optional fixed part of the binutils filename. +CROSS_COMPILE can be a part of the filename or the full path. + +CROSS_COMPILE is also used for ccache is some setups. + +CF +-------------------------------------------------- +Additional options for sparse. +CF is often used on the command-line like this: + + make CF=-Wbitwise C=2 + +INSTALL_PATH +-------------------------------------------------- +INSTALL_PATH specifies where to place the updated kernel and system map +images. Default is /boot, but you can set it to other values + + +MODLIB +-------------------------------------------------- +Specify where to install modules. +The default value is: + + $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) + +The value can be overridden in which case the default value is ignored. + +INSTALL_MOD_PATH +-------------------------------------------------- +INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +relocations required by build roots. This is not defined in the +makefile but the argument can be passed to make if needed. + +INSTALL_MOD_STRIP +-------------------------------------------------- +INSTALL_MOD_STRIP, if defined, will cause modules to be +stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +the default option --strip-debug will be used. Otherwise, +INSTALL_MOD_STRIP will used as the options to the strip command. + +INSTALL_FW_PATH +-------------------------------------------------- +INSTALL_FW_PATH specify where to install the firmware blobs. +The default value is: + + $(INSTALL_MOD_PATH)/lib/firmware + +The value can be overridden in which case the default value is ignored. + +INSTALL_HDR_PATH +-------------------------------------------------- +INSTALL_HDR_PATH specify where to install user space headers when +executing "make headers_*". +The default value is: + + $(objtree)/usr + +$(objtree) is the directory where output files are saved. +The output directory is often set using "O=..." on the commandline. + +The value can be overridden in which case the default value is ignored. + +KBUILD_MODPOST_WARN +-------------------------------------------------- +KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined +symbols in the final module linking stage. + +KBUILD_MODPOST_FINAL +-------------------------------------------------- +KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. +This is solely usefull to speed up test compiles. + +KBUILD_EXTRA_SYMBOLS +-------------------------------------------------- +For modules use symbols from another modules. +See more details in modules.txt. diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt new file mode 100644 index 000000000000..26a7c0a93193 --- /dev/null +++ b/Documentation/kbuild/kconfig.txt @@ -0,0 +1,188 @@ +This file contains some assistance for using "make *config". + +Use "make help" to list all of the possible configuration targets. + +The xconfig ('qconf') and menuconfig ('mconf') programs also +have embedded help text. Be sure to check it for navigation, +search, and other general help text. + +====================================================================== +General +-------------------------------------------------- + +New kernel releases often introduce new config symbols. Often more +important, new kernel releases may rename config symbols. When +this happens, using a previously working .config file and running +"make oldconfig" won't necessarily produce a working new kernel +for you, so you may find that you need to see what NEW kernel +symbols have been introduced. + +To see a list of new config symbols when using "make oldconfig", use + + cp user/some/old.config .config + yes "" | make oldconfig >conf.new + +and the config program will list as (NEW) any new symbols that have +unknown values. Of course, the .config file is also updated with +new (default) values, so you can use: + + grep "(NEW)" conf.new + +to see the new config symbols or you can 'diff' the previous and +new .config files to see the differences: + + diff .config.old .config | less + +(Yes, we need something better here.) + + +====================================================================== +menuconfig +-------------------------------------------------- + +SEARCHING for CONFIG symbols + +Searching in menuconfig: + + The Search function searches for kernel configuration symbol + names, so you have to know something close to what you are + looking for. + + Example: + /hotplug + This lists all config symbols that contain "hotplug", + e.g., HOTPLUG, HOTPLUG_CPU, MEMORY_HOTPLUG. + + For search help, enter / followed TAB-TAB-TAB (to highlight + <Help>) and Enter. This will tell you that you can also use + regular expressions (regexes) in the search string, so if you + are not interested in MEMORY_HOTPLUG, you could try + + /^hotplug + + +______________________________________________________________________ +Color Themes for 'menuconfig' + +It is possible to select different color themes using the variable +MENUCONFIG_COLOR. To select a theme use: + + make MENUCONFIG_COLOR=<theme> menuconfig + +Available themes are: + mono => selects colors suitable for monochrome displays + blackbg => selects a color scheme with black background + classic => theme with blue background. The classic look + bluetitle => a LCD friendly version of classic. (default) + +______________________________________________________________________ +Environment variables in 'menuconfig' + +KCONFIG_ALLCONFIG +-------------------------------------------------- +(partially based on lkml email from/by Rob Landley, re: miniconfig) +-------------------------------------------------- +The allyesconfig/allmodconfig/allnoconfig/randconfig variants can +also use the environment variable KCONFIG_ALLCONFIG as a flag or a +filename that contains config symbols that the user requires to be +set to a specific value. If KCONFIG_ALLCONFIG is used without a +filename, "make *config" checks for a file named +"all{yes/mod/no/random}.config" (corresponding to the *config command +that was used) for symbol values that are to be forced. If this file +is not found, it checks for a file named "all.config" to contain forced +values. + +This enables you to create "miniature" config (miniconfig) or custom +config files containing just the config symbols that you are interested +in. Then the kernel config system generates the full .config file, +including dependencies of your miniconfig file, based on the miniconfig +file. + +This 'KCONFIG_ALLCONFIG' file is a config file which contains +(usually a subset of all) preset config symbols. These variable +settings are still subject to normal dependency checks. + +Examples: + KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig +or + KCONFIG_ALLCONFIG=mini.config make allnoconfig +or + make KCONFIG_ALLCONFIG=mini.config allnoconfig + +These examples will disable most options (allnoconfig) but enable or +disable the options that are explicitly listed in the specified +mini-config files. + +KCONFIG_NOSILENTUPDATE +-------------------------------------------------- +If this variable has a non-blank value, it prevents silent kernel +config udpates (requires explicit updates). + +KCONFIG_CONFIG +-------------------------------------------------- +This environment variable can be used to specify a default kernel config +file name to override the default name of ".config". + +KCONFIG_OVERWRITECONFIG +-------------------------------------------------- +If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not +break symlinks when .config is a symlink to somewhere else. + +KCONFIG_NOTIMESTAMP +-------------------------------------------------- +If this environment variable exists and is non-null, the timestamp line +in generated .config files is omitted. + +KCONFIG_AUTOCONFIG +-------------------------------------------------- +This environment variable can be set to specify the path & name of the +"auto.conf" file. Its default value is "include/config/auto.conf". + +KCONFIG_AUTOHEADER +-------------------------------------------------- +This environment variable can be set to specify the path & name of the +"autoconf.h" (header) file. Its default value is "include/linux/autoconf.h". + +______________________________________________________________________ +menuconfig User Interface Options +---------------------------------------------------------------------- +MENUCONFIG_MODE +-------------------------------------------------- +This mode shows all sub-menus in one large tree. + +Example: + MENUCONFIG_MODE=single_menu make menuconfig + +====================================================================== +xconfig +-------------------------------------------------- + +Searching in xconfig: + + The Search function searches for kernel configuration symbol + names, so you have to know something close to what you are + looking for. + + Example: + Ctrl-F hotplug + or + Menu: File, Search, hotplug + + lists all config symbol entries that contain "hotplug" in + the symbol name. In this Search dialog, you may change the + config setting for any of the entries that are not grayed out. + You can also enter a different search string without having + to return to the main menu. + + +====================================================================== +gconfig +-------------------------------------------------- + +Searching in gconfig: + + None (gconfig isn't maintained as well as xconfig or menuconfig); + however, gconfig does have a few more viewing choices than + xconfig does. + +### diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf index 2e78b70f3adc..426ddaaef96f 100644 --- a/Documentation/usb/wusb-cbaf +++ b/Documentation/usb/wusb-cbaf @@ -80,12 +80,6 @@ case $1 in start) for dev in ${2:-$hdevs} do - uwb_rc=$(readlink -f $dev/uwb_rc) - if cat $uwb_rc/beacon | grep -q -- "-1" - then - echo 13 0 > $uwb_rc/beacon - echo I: started beaconing on ch 13 on $(basename $uwb_rc) >&2 - fi echo $host_CHID > $dev/wusb_chid echo I: started host $(basename $dev) >&2 done @@ -95,9 +89,6 @@ case $1 in do echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid echo I: stopped host $(basename $dev) >&2 - uwb_rc=$(readlink -f $dev/uwb_rc) - echo -1 | cat > $uwb_rc/beacon - echo I: stopped beaconing on $(basename $uwb_rc) >&2 done ;; set-chid) diff --git a/MAINTAINERS b/MAINTAINERS index ceb32ee51f9d..befacf07729f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2049,6 +2049,12 @@ M: mikulas@artax.karlin.mff.cuni.cz W: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi S: Maintained +HSO 3G Modem Driver (hso.c) +P: Denis Joseph Barrow +M: d.barow@option.com +W: http://www.pharscape.org +S: Maintained + HTCPEN TOUCHSCREEN DRIVER P: Pau Oliva Fora M: pof@eslack.org @@ -2146,11 +2152,6 @@ M: Gadi Oxman <gadio@netvision.net.il> L: linux-kernel@vger.kernel.org S: Maintained -IDE-SCSI DRIVER -L: linux-ide@vger.kernel.org -L: linux-scsi@vger.kernel.org -S: Orphan - IDLE-I7300 P: Andy Henroid M: andrew.d.henroid@intel.com @@ -2541,8 +2542,6 @@ W: http://kvm.qumranet.com S: Supported KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64) -P: Anthony Xu -M: anthony.xu@intel.com P: Xiantao Zhang M: xiantao.zhang@intel.com L: kvm-ia64@vger.kernel.org @@ -2635,13 +2634,13 @@ W: http://www.hansenpartnership.com/voyager S: Maintained LINUX FOR POWERPC (32-BIT AND 64-BIT) -P: Paul Mackerras -M: paulus@samba.org P: Benjamin Herrenschmidt M: benh@kernel.crashing.org +P: Paul Mackerras +M: paulus@samba.org W: http://www.penguinppc.org/ L: linuxppc-dev@ozlabs.org -T: git kernel.org:/pub/scm/linux/kernel/git/paulus/powerpc.git +T: git kernel.org:/pub/scm/linux/kernel/git/benh/powerpc.git S: Supported LINUX FOR POWER MACINTOSH @@ -321,7 +321,8 @@ KALLSYMS = scripts/kallsyms PERL = perl CHECK = sparse -CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF) +CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ + -Wbitwise -Wno-return-void $(CF) MODFLAGS = -DMODULE CFLAGS_MODULE = $(MODFLAGS) AFLAGS_MODULE = $(MODFLAGS) @@ -52,11 +52,11 @@ DOCUMENTATION: - The Documentation/DocBook/ subdirectory contains several guides for kernel developers and users. These guides can be rendered in a - number of formats: PostScript (.ps), PDF, and HTML, among others. - After installation, "make psdocs", "make pdfdocs", or "make htmldocs" - will render the documentation in the requested format. + number of formats: PostScript (.ps), PDF, HTML, & man-pages, among others. + After installation, "make psdocs", "make pdfdocs", "make htmldocs", + or "make mandocs" will render the documentation in the requested format. -INSTALLING the kernel: +INSTALLING the kernel source: - If you install the full sources, put the kernel tarball in a directory where you have permissions (eg. your home directory) and @@ -187,14 +187,9 @@ CONFIGURING the kernel: "make randconfig" Create a ./.config file by setting symbol values to random values. - The allyesconfig/allmodconfig/allnoconfig/randconfig variants can - also use the environment variable KCONFIG_ALLCONFIG to specify a - filename that contains config options that the user requires to be - set to a specific value. If KCONFIG_ALLCONFIG=filename is not used, - "make *config" checks for a file named "all{yes/mod/no/random}.config" - for symbol values that are to be forced. If this file is not found, - it checks for a file named "all.config" to contain forced values. - + You can find more information on using the Linux kernel config tools + in Documentation/kbuild/make-configs.txt. + NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can under some circumstances lead to problems: probing for a @@ -231,6 +226,19 @@ COMPILING the kernel: - If you configured any of the parts of the kernel as `modules', you will also have to do "make modules_install". + - Verbose kernel compile/build output: + + Normally the kernel build system runs in a fairly quiet mode (but not + totally silent). However, sometimes you or other kernel developers need + to see compile, link, or other commands exactly as they are executed. + For this, use "verbose" build mode. This is done by inserting + "V=1" in the "make" command. E.g.: + + make V=1 all + + To have the build system also tell the reason for the rebuild of each + target, use "V=2". The default is "V=0". + - Keep a backup kernel handy in case something goes wrong. This is especially true for the development releases, since each new release contains new code which has not been debugged. Make sure you keep a diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h index 544c69af8168..547e90951cec 100644 --- a/arch/alpha/include/asm/smp.h +++ b/arch/alpha/include/asm/smp.h @@ -45,7 +45,6 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS]; #define raw_smp_processor_id() (current_thread_info()->cpu) extern int smp_num_cpus; -#define cpu_possible_map cpu_present_map extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi(cpumask_t mask); diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index ac706c1d7ada..b4697759a123 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o + alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c new file mode 100644 index 000000000000..4a0af906b00a --- /dev/null +++ b/arch/alpha/kernel/binfmt_loader.c @@ -0,0 +1,51 @@ +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mm_types.h> +#include <linux/binfmts.h> +#include <linux/a.out.h> + +static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct exec *eh = (struct exec *)bprm->buf; + unsigned long loader; + struct file *file; + int retval; + + if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000) + return -ENOEXEC; + + if (bprm->loader) + return -ENOEXEC; + + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; + + loader = bprm->vma->vm_end - sizeof(void *); + + file = open_exec("/sbin/loader"); + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + /* Remember if the application is TASO. */ + bprm->taso = eh->ah.entry < 0x100000000UL; + + bprm->file = file; + bprm->loader = loader; + retval = prepare_binprm(bprm); + if (retval < 0) + return retval; + return search_binary_handler(bprm,regs); +} + +static struct linux_binfmt loader_format = { + .load_binary = load_binary, +}; + +static int __init init_loader_binfmt(void) +{ + return register_binfmt(&loader_format); +} +arch_initcall(init_loader_binfmt); diff --git a/arch/alpha/kernel/init_task.c b/arch/alpha/kernel/init_task.c index 1f762189fa64..c2938e574a40 100644 --- a/arch/alpha/kernel/init_task.c +++ b/arch/alpha/kernel/init_task.c @@ -8,7 +8,6 @@ #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index c626a821cdcb..d0f1620007f7 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq) last_cpu = cpu; irq_desc[irq].affinity = cpumask_of_cpu(cpu); - irq_desc[irq].chip->set_affinity(irq, cpumask_of_cpu(cpu)); + irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu)); return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 351407e07e71..f238370c907d 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -94,6 +94,7 @@ common_shutdown_1(void *generic_ptr) flags |= 0x00040000UL; /* "remain halted" */ *pflags = flags; cpu_clear(cpuid, cpu_present_map); + cpu_clear(cpuid, cpu_possible_map); halt(); } #endif @@ -120,6 +121,7 @@ common_shutdown_1(void *generic_ptr) #ifdef CONFIG_SMP /* Wait for the secondaries to halt. */ cpu_clear(boot_cpuid, cpu_present_map); + cpu_clear(boot_cpuid, cpu_possible_map); while (cpus_weight(cpu_present_map)) barrier(); #endif diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index cf7da10097bb..d953e510f68d 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -70,11 +70,6 @@ enum ipi_message_type { /* Set to a secondary's cpuid when it comes online. */ static int smp_secondary_alive __devinitdata = 0; -/* Which cpus ids came online. */ -cpumask_t cpu_online_map; - -EXPORT_SYMBOL(cpu_online_map); - int smp_num_probed; /* Internal processor count */ int smp_num_cpus = 1; /* Number that came online. */ EXPORT_SYMBOL(smp_num_cpus); @@ -440,6 +435,7 @@ setup_smp(void) ((char *)cpubase + i*hwrpb->processor_size); if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; + cpu_set(i, cpu_possible_map); cpu_set(i, cpu_present_map); cpu->pal_revision = boot_cpu_palrev; } @@ -473,6 +469,7 @@ smp_prepare_cpus(unsigned int max_cpus) /* Nothing to do on a UP box, or when told not to. */ if (smp_num_probed == 1 || max_cpus == 0) { + cpu_possible_map = cpumask_of_cpu(boot_cpuid); cpu_present_map = cpumask_of_cpu(boot_cpuid); printk(KERN_INFO "SMP mode deactivated.\n"); return; diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index c71b0fd7a61f..ab44c164d9d4 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -177,19 +177,19 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -dp264_set_affinity(unsigned int irq, cpumask_t affinity) +dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq, affinity); + cpu_set_irq_affinity(irq, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } static void -clipper_set_affinity(unsigned int irq, cpumask_t affinity) +clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); - cpu_set_irq_affinity(irq - 16, affinity); + cpu_set_irq_affinity(irq - 16, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); } diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 52c91ccc1648..27f840a4ad3d 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -158,10 +158,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } static void -titan_set_irq_affinity(unsigned int irq, cpumask_t affinity) +titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&titan_irq_lock); - titan_cpu_set_irq_affinity(irq - 16, affinity); + titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); } diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index 7fc9860a97d7..c6884ba1d5ed 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -109,11 +109,11 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, cpumask_t mask_val) +static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; - unsigned int cpu = first_cpu(mask_val); + unsigned int cpu = cpumask_first(mask_val); u32 val; spin_lock(&irq_controller_lock); diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c index 0bbf80625395..e859af349467 100644 --- a/arch/arm/kernel/init_task.c +++ b/arch/arm/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 2f3eb795fa6e..7141cee1fab7 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -174,7 +174,7 @@ static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); spin_lock_irq(&desc->lock); - desc->chip->set_affinity(irq, cpumask_of_cpu(cpu)); + desc->chip->set_affinity(irq, cpumask_of(cpu)); spin_unlock_irq(&desc->lock); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 019237d21622..55fa7ff96a3e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -34,16 +34,6 @@ #include <asm/ptrace.h> /* - * bitmask of present and online CPUs. - * The present bitmask indicates that the CPU is physically present. - * The online bitmask indicates that the CPU is up and running. - */ -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - -/* * as from 2.5, kernels no longer have an init_tasks structure * so we need some other way of telling a new secondary core * where to place its SVC stack diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index d140eae53ded..1ff1bda0a894 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -178,7 +178,6 @@ static struct clock_event_device clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 150, - .cpumask = CPU_MASK_CPU0, .set_next_event = clkevt32k_next_event, .set_mode = clkevt32k_mode, }; @@ -206,7 +205,7 @@ void __init at91rm9200_timer_init(void) clkevt.mult = div_sc(AT91_SLOW_CLOCK, NSEC_PER_SEC, clkevt.shift); clkevt.max_delta_ns = clockevent_delta2ns(AT91_ST_ALMV, &clkevt); clkevt.min_delta_ns = clockevent_delta2ns(2, &clkevt) + 1; - clkevt.cpumask = cpumask_of_cpu(0); + clkevt.cpumask = cpumask_of(0); clockevents_register_device(&clkevt); /* register clocksource */ diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 122fd77ed580..b63e1d5f1bad 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -91,7 +91,6 @@ static struct clock_event_device pit_clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 100, - .cpumask = CPU_MASK_CPU0, .set_mode = pit_clkevt_mode, }; @@ -173,6 +172,7 @@ static void __init at91sam926x_pit_init(void) /* Set up and register clockevents */ pit_clkevt.mult = div_sc(pit_rate, NSEC_PER_SEC, pit_clkevt.shift); + pit_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&pit_clkevt); } diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index 3b9a296b5c4b..f8bcd29d17a6 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -322,7 +322,7 @@ static void __init davinci_timer_init(void) clockevent_davinci.min_delta_ns = clockevent_delta2ns(1, &clockevent_davinci); - clockevent_davinci.cpumask = cpumask_of_cpu(0); + clockevent_davinci.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_davinci); } diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c index a11765f5f23b..aff0ebcfa847 100644 --- a/arch/arm/mach-imx/time.c +++ b/arch/arm/mach-imx/time.c @@ -184,7 +184,7 @@ static int __init imx_clockevent_init(unsigned long rate) clockevent_imx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_imx); - clockevent_imx.cpumask = cpumask_of_cpu(0); + clockevent_imx.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_imx); diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 7766f469456b..f4656d2ac8a8 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -487,7 +487,7 @@ static int __init ixp4xx_clockevent_init(void) clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx); clockevent_ixp4xx.min_delta_ns = clockevent_delta2ns(0xf, &clockevent_ixp4xx); - clockevent_ixp4xx.cpumask = cpumask_of_cpu(0); + clockevent_ixp4xx.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_ixp4xx); return 0; diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 345a14cb73c3..444d9c0f5ca6 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -182,7 +182,7 @@ static void __init msm_timer_init(void) clockevent_delta2ns(0xf0000000 >> clock->shift, ce); /* 4 gets rounded down to 3 */ ce->min_delta_ns = clockevent_delta2ns(4, ce); - ce->cpumask = cpumask_of_cpu(0); + ce->cpumask = cpumask_of(0); cs->mult = clocksource_hz2mult(clock->freq, cs->shift); res = clocksource_register(cs); diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index a63424d083d9..41df69721769 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -173,7 +173,7 @@ static void __init ns9360_timer_init(void) ns9360_clockevent_device.min_delta_ns = clockevent_delta2ns(1, &ns9360_clockevent_device); - ns9360_clockevent_device.cpumask = cpumask_of_cpu(0); + ns9360_clockevent_device.cpumask = cpumask_of(0); clockevents_register_device(&ns9360_clockevent_device); setup_irq(IRQ_NS9360_TIMER0 + TIMER_CLOCKEVENT, diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 2cf7e32bd293..495a32c287b4 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -173,7 +173,7 @@ static __init void omap_init_mpu_timer(unsigned long rate) clockevent_mpu_timer1.min_delta_ns = clockevent_delta2ns(1, &clockevent_mpu_timer1); - clockevent_mpu_timer1.cpumask = cpumask_of_cpu(0); + clockevent_mpu_timer1.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_mpu_timer1); } diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c index 705367ece174..fd3f7396e162 100644 --- a/arch/arm/mach-omap1/timer32k.c +++ b/arch/arm/mach-omap1/timer32k.c @@ -187,7 +187,7 @@ static __init void omap_init_32k_timer(void) clockevent_32k_timer.min_delta_ns = clockevent_delta2ns(1, &clockevent_32k_timer); - clockevent_32k_timer.cpumask = cpumask_of_cpu(0); + clockevent_32k_timer.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_32k_timer); } diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index 589393bedade..ae6036300f60 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -120,7 +120,7 @@ static void __init omap2_gp_clockevent_init(void) clockevent_gpt.min_delta_ns = clockevent_delta2ns(1, &clockevent_gpt); - clockevent_gpt.cpumask = cpumask_of_cpu(0); + clockevent_gpt.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_gpt); } diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 001624158519..95656a72268d 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -122,7 +122,6 @@ static struct clock_event_device ckevt_pxa_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = pxa_osmr0_set_next_event, .set_mode = pxa_osmr0_set_mode, }; @@ -163,6 +162,7 @@ static void __init pxa_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_pxa_osmr0); ckevt_pxa_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; + ckevt_pxa_osmr0.cpumask = cpumask_of(0); cksrc_pxa_oscr0.mult = clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 5f1d55963ced..bd2aa4f16141 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -624,7 +624,7 @@ static struct clock_event_device timer0_clockevent = { .set_mode = timer_set_mode, .set_next_event = timer_set_next_event, .rating = 300, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, }; static void __init realview_clockevents_init(unsigned int timer_irq) diff --git a/arch/arm/mach-realview/localtimer.c b/arch/arm/mach-realview/localtimer.c index 9019ef2e5611..67d6d9cc68b2 100644 --- a/arch/arm/mach-realview/localtimer.c +++ b/arch/arm/mach-realview/localtimer.c @@ -154,7 +154,7 @@ void __cpuinit local_timer_setup(void) clk->set_mode = local_timer_set_mode; clk->set_next_event = local_timer_set_next_event; clk->irq = IRQ_LOCALTIMER; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clk->shift = 20; clk->mult = div_sc(mpcore_timer_rate, NSEC_PER_SEC, clk->shift); clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); @@ -193,7 +193,7 @@ void __cpuinit local_timer_setup(void) clk->rating = 200; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); } diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 8c5e727f3b75..711c0295c66f 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -73,7 +73,6 @@ static struct clock_event_device ckevt_sa1100_osmr0 = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 32, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_next_event = sa1100_osmr0_set_next_event, .set_mode = sa1100_osmr0_set_mode, }; @@ -110,6 +109,7 @@ static void __init sa1100_timer_init(void) clockevent_delta2ns(0x7fffffff, &ckevt_sa1100_osmr0); ckevt_sa1100_osmr0.min_delta_ns = clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; + ckevt_sa1100_osmr0.cpumask = cpumask_of(0); cksrc_sa1100_oscr.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index df25aa138509..1c43494f5c42 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -1005,7 +1005,7 @@ static void __init versatile_timer_init(void) timer0_clockevent.min_delta_ns = clockevent_delta2ns(0xf, &timer0_clockevent); - timer0_clockevent.cpumask = cpumask_of_cpu(0); + timer0_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&timer0_clockevent); } diff --git a/arch/arm/oprofile/op_model_mpcore.c b/arch/arm/oprofile/op_model_mpcore.c index 4de366e8b4c5..6d6bd5899240 100644 --- a/arch/arm/oprofile/op_model_mpcore.c +++ b/arch/arm/oprofile/op_model_mpcore.c @@ -260,10 +260,10 @@ static void em_stop(void) static void em_route_irq(int irq, unsigned int cpu) { struct irq_desc *desc = irq_desc + irq; - cpumask_t mask = cpumask_of_cpu(cpu); + const struct cpumask *mask = cpumask_of(cpu); spin_lock_irq(&desc->lock); - desc->affinity = mask; + desc->affinity = *mask; desc->chip->set_affinity(irq, mask); spin_unlock_irq(&desc->lock); } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index fd28f5194f71..758a1293bcfa 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -190,7 +190,7 @@ static int __init mxc_clockevent_init(void) clockevent_mxc.min_delta_ns = clockevent_delta2ns(0xff, &clockevent_mxc); - clockevent_mxc.cpumask = cpumask_of_cpu(0); + clockevent_mxc.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_mxc); diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 544d6b327f3a..6fa2923e6dca 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -149,7 +149,6 @@ static struct clock_event_device orion_clkevt = { .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, - .cpumask = CPU_MASK_CPU0, .set_next_event = orion_clkevt_next_event, .set_mode = orion_clkevt_mode, }; @@ -199,5 +198,6 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk) orion_clkevt.mult = div_sc(tclk, NSEC_PER_SEC, orion_clkevt.shift); orion_clkevt.max_delta_ns = clockevent_delta2ns(0xfffffffe, &orion_clkevt); orion_clkevt.min_delta_ns = clockevent_delta2ns(1, &orion_clkevt); + orion_clkevt.cpumask = cpumask_of(0); clockevents_register_device(&orion_clkevt); } diff --git a/arch/avr32/kernel/init_task.c b/arch/avr32/kernel/init_task.c index 44058469c6ec..993d56ee3cf3 100644 --- a/arch/avr32/kernel/init_task.c +++ b/arch/avr32/kernel/init_task.c @@ -13,7 +13,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c index 283481d74a5b..0ff46bf873b0 100644 --- a/arch/avr32/kernel/time.c +++ b/arch/avr32/kernel/time.c @@ -106,7 +106,6 @@ static struct clock_event_device comparator = { .features = CLOCK_EVT_FEAT_ONESHOT, .shift = 16, .rating = 50, - .cpumask = CPU_MASK_CPU0, .set_next_event = comparator_next_event, .set_mode = comparator_mode, }; @@ -134,6 +133,7 @@ void __init time_init(void) comparator.mult = div_sc(counter_hz, NSEC_PER_SEC, comparator.shift); comparator.max_delta_ns = clockevent_delta2ns((u32)~0, &comparator); comparator.min_delta_ns = clockevent_delta2ns(50, &comparator) + 1; + comparator.cpumask = cpumask_of(0); sysreg_write(COMPARE, 0); timer_irqaction.dev_id = &comparator; diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c index 6bdba7b21109..2c228c020978 100644 --- a/arch/blackfin/kernel/init_task.c +++ b/arch/blackfin/kernel/init_task.c @@ -33,7 +33,6 @@ #include <linux/mqueue.h> #include <linux/fs.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c index e887efc86c29..0ed2badfd746 100644 --- a/arch/blackfin/kernel/time-ts.c +++ b/arch/blackfin/kernel/time-ts.c @@ -162,7 +162,6 @@ static struct clock_event_device clockevent_bfin = { .name = "bfin_core_timer", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .shift = 32, - .cpumask = CPU_MASK_CPU0, .set_next_event = bfin_timer_set_next_event, .set_mode = bfin_timer_set_mode, }; @@ -193,6 +192,7 @@ static int __init bfin_clockevent_init(void) clockevent_bfin.mult = div_sc(timer_clk, NSEC_PER_SEC, clockevent_bfin.shift); clockevent_bfin.max_delta_ns = clockevent_delta2ns(-1, &clockevent_bfin); clockevent_bfin.min_delta_ns = clockevent_delta2ns(100, &clockevent_bfin); + clockevent_bfin.cpumask = cpumask_of(0); clockevents_register_device(&clockevent_bfin); return 0; diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index 173c141ac9ba..295131fee710 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -325,11 +325,11 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, cpumask_t dest) +void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); - irq_allocations[irq - FIRST_IRQ].mask = dest; + irq_allocations[irq - FIRST_IRQ].mask = *dest; spin_unlock_irqrestore(&irq_lock, flags); } diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c index 52e16c6436f9..9dac17334640 100644 --- a/arch/cris/arch-v32/kernel/smp.c +++ b/arch/cris/arch-v32/kernel/smp.c @@ -29,11 +29,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED}; /* CPU masks */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); cpumask_t phys_cpu_present_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(phys_cpu_present_map); /* Variables used during SMP boot */ diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h index dba33aba3e95..c615a06dd757 100644 --- a/arch/cris/include/asm/smp.h +++ b/arch/cris/include/asm/smp.h @@ -4,7 +4,6 @@ #include <linux/cpumask.h> extern cpumask_t phys_cpu_present_map; -extern cpumask_t cpu_possible_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 5933656db5a2..60816e876455 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -37,7 +37,6 @@ * setup. */ -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/frv/kernel/init_task.c b/arch/frv/kernel/init_task.c index e2198815b630..29429a8b7f6a 100644 --- a/arch/frv/kernel/init_task.c +++ b/arch/frv/kernel/init_task.c @@ -10,7 +10,6 @@ #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/h8300/kernel/init_task.c b/arch/h8300/kernel/init_task.c index 93a4899e46c2..cb5dc552da97 100644 --- a/arch/h8300/kernel/init_task.c +++ b/arch/h8300/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index c2f58ff364e7..cc0a3182db3c 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -22,7 +22,7 @@ hpsim_irq_noop (unsigned int irq) } static void -hpsim_set_affinity_noop (unsigned int a, cpumask_t b) +hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) { } diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index f38472ac2267..68aa6da807c1 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h @@ -166,8 +166,6 @@ struct saved_vpd { }; struct kvm_regs { - char *saved_guest; - char *saved_stack; struct saved_vpd vpd; /*Arch-regs*/ int mp_state; @@ -200,6 +198,10 @@ struct kvm_regs { unsigned long fp_psr; /*used for lazy float register */ unsigned long saved_gp; /*for phycial emulation */ + + union context saved_guest; + + unsigned long reserved[64]; /* for future use */ }; struct kvm_sregs { diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index c60d324da540..0560f3fae538 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -23,17 +23,6 @@ #ifndef __ASM_KVM_HOST_H #define __ASM_KVM_HOST_H - -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/kvm.h> -#include <linux/kvm_para.h> -#include <linux/kvm_types.h> - -#include <asm/pal.h> -#include <asm/sal.h> - -#define KVM_MAX_VCPUS 4 #define KVM_MEMORY_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 @@ -50,70 +39,132 @@ #define EXIT_REASON_EXTERNAL_INTERRUPT 6 #define EXIT_REASON_IPI 7 #define EXIT_REASON_PTC_G 8 +#define EXIT_REASON_DEBUG 20 /*Define vmm address space and vm data space.*/ -#define KVM_VMM_SIZE (16UL<<20) +#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20) #define KVM_VMM_SHIFT 24 -#define KVM_VMM_BASE 0xD000000000000000UL -#define VMM_SIZE (8UL<<20) +#define KVM_VMM_BASE 0xD000000000000000 +#define VMM_SIZE (__IA64_UL_CONST(8)<<20) /* * Define vm_buffer, used by PAL Services, base address. - * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M + * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M */ #define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE) -#define KVM_VM_BUFFER_SIZE (8UL<<20) - -/*Define Virtual machine data layout.*/ -#define KVM_VM_DATA_SHIFT 24 -#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT) -#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE) - - -#define KVM_P2M_BASE KVM_VM_DATA_BASE -#define KVM_P2M_OFS 0 -#define KVM_P2M_SIZE (8UL << 20) - -#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE) -#define KVM_VHPT_OFS KVM_P2M_SIZE -#define KVM_VHPT_BLOCK_SIZE (2UL << 20) -#define VHPT_SHIFT 18 -#define VHPT_SIZE (1UL << VHPT_SHIFT) -#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5)) - -#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE) -#define KVM_VTLB_BLOCK_SIZE (1UL<<20) -#define VTLB_SHIFT 17 -#define VTLB_SIZE (1UL<<VTLB_SHIFT) -#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5)) - -#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE) -#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE) -#define KVM_VPD_BLOCK_SIZE (2UL<<20) -#define VPD_SHIFT 16 -#define VPD_SIZE (1UL<<VPD_SHIFT) - -#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE) -#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE) -#define KVM_VCPU_BLOCK_SIZE (2UL<<20) -#define VCPU_SHIFT 18 -#define VCPU_SIZE (1UL<<VCPU_SHIFT) -#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE - -#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE) -#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE) -#define KVM_VM_BLOCK_SIZE (1UL<<19) - -#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE) -#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE) -#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19) - -/* Get vpd, vhpt, tlb, vcpu, base*/ -#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE) -#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE) -#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE) -#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE) +#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20) + +/* + * kvm guest's data area looks as follow: + * + * +----------------------+ ------- KVM_VM_DATA_SIZE + * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET + * | | | / | + * | .......... | | /vcpu's struct&stack | + * | .......... | | /---------------------|---- 0 + * | vcpu[5]'s data | | / vpd | + * | vcpu[4]'s data | |/-----------------------| + * | vcpu[3]'s data | / vtlb | + * | vcpu[2]'s data | /|------------------------| + * | vcpu[1]'s data |/ | vhpt | + * | vcpu[0]'s data |____________________________| + * +----------------------+ | + * | memory dirty log | | + * +----------------------+ | + * | vm's data struct | | + * +----------------------+ | + * | | | + * | | | + * | | | + * | | | + * | | | + * | | | + * | | | + * | vm's p2m table | | + * | | | + * | | | + * | | | | + * vm's data->| | | | + * +----------------------+ ------- 0 + * To support large memory, needs to increase the size of p2m. + * To support more vcpus, needs to ensure it has enough space to + * hold vcpus' data. + */ + +#define KVM_VM_DATA_SHIFT 26 +#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT) +#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE) + +#define KVM_P2M_BASE KVM_VM_DATA_BASE +#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20) + +#define VHPT_SHIFT 16 +#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT) +#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5)) + +#define VTLB_SHIFT 16 +#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT) +#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5)) + +#define VPD_SHIFT 16 +#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT) + +#define VCPU_STRUCT_SHIFT 16 +#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT) + +#define KVM_STK_OFFSET VCPU_STRUCT_SIZE + +#define KVM_VM_STRUCT_SHIFT 19 +#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT) + +#define KVM_MEM_DIRY_LOG_SHIFT 19 +#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT) + +#ifndef __ASSEMBLY__ + +/*Define the max vcpus and memory for Guests.*/ +#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\ + KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data) +#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT) + +#define VMM_LOG_LEN 256 + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/kvm_types.h> + +#include <asm/pal.h> +#include <asm/sal.h> +#include <asm/page.h> + +struct kvm_vcpu_data { + char vcpu_vhpt[VHPT_SIZE]; + char vcpu_vtlb[VTLB_SIZE]; + char vcpu_vpd[VPD_SIZE]; + char vcpu_struct[VCPU_STRUCT_SIZE]; +}; + +struct kvm_vm_data { + char kvm_p2m[KVM_P2M_SIZE]; + char kvm_vm_struct[KVM_VM_STRUCT_SIZE]; + char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE]; + struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS]; +}; + +#define VCPU_BASE(n) KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, vcpu_data[n]) +#define VM_BASE KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, kvm_vm_struct) +#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \ + offsetof(struct kvm_vm_data, kvm_mem_dirty_log) + +#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt)) +#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb)) +#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd)) +#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \ + offsetof(struct kvm_vcpu_data, vcpu_struct)) /*IO section definitions*/ #define IOREQ_READ 1 @@ -389,6 +440,7 @@ struct kvm_vcpu_arch { unsigned long opcode; unsigned long cause; + char log_buf[VMM_LOG_LEN]; union context host; union context guest; }; @@ -403,14 +455,13 @@ struct kvm_sal_data { }; struct kvm_arch { + spinlock_t dirty_log_lock; + unsigned long vm_base; unsigned long metaphysical_rr0; unsigned long metaphysical_rr4; unsigned long vmm_init_rr; - unsigned long vhpt_base; - unsigned long vtlb_base; - unsigned long vpd_base; - spinlock_t dirty_log_lock; + struct kvm_ioapic *vioapic; struct kvm_vm_stat stat; struct kvm_sal_data rdv_sal_data; @@ -512,7 +563,7 @@ struct kvm_pt_regs { static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v) { - return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1; + return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1; } typedef int kvm_vmm_entry(void); @@ -531,5 +582,6 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); void kvm_sal_emul(struct kvm_vcpu *vcpu); static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} +#endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h index 12d96e0cd513..21c402365d0e 100644 --- a/arch/ia64/include/asm/smp.h +++ b/arch/ia64/include/asm/smp.h @@ -57,7 +57,6 @@ extern struct smp_boot_data { extern char no_int_routing __devinitdata; -extern cpumask_t cpu_online_map; extern cpumask_t cpu_core_map[NR_CPUS]; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); extern int smp_num_siblings; diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 35bcb641c9e5..a3cc9f65f954 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -55,7 +55,6 @@ void build_cpu_to_node_map(void); #define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -80,7 +79,6 @@ void build_cpu_to_node_map(void); /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index 9d7e1c66faf4..5b0e830c6f33 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -17,7 +17,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 5c4674ae8aea..c8adecd5b416 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -330,25 +330,25 @@ unmask_irq (unsigned int irq) static void -iosapic_set_affinity (unsigned int irq, cpumask_t mask) +iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) { #ifdef CONFIG_SMP u32 high32, low32; - int dest, rte_index; + int cpu, dest, rte_index; int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; struct iosapic_rte_info *rte; struct iosapic *iosapic; irq &= (~IA64_IRQ_REDIRECTED); - cpus_and(mask, mask, cpu_online_map); - if (cpus_empty(mask)) + cpu = cpumask_first_and(cpu_online_mask, mask); + if (cpu >= nr_cpu_ids) return; - if (irq_prepare_move(irq, first_cpu(mask))) + if (irq_prepare_move(irq, cpu)) return; - dest = cpu_physical_id(first_cpu(mask)); + dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) return; /* not an IOSAPIC interrupt */ diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 7fd18f54c056..0b6db53fedcf 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -133,7 +133,6 @@ unsigned int vectors_in_migration[NR_IRQS]; */ static void migrate_irqs(void) { - cpumask_t mask; irq_desc_t *desc; int irq, new_cpu; @@ -152,15 +151,14 @@ static void migrate_irqs(void) if (desc->status == IRQ_PER_CPU) continue; - cpus_and(mask, irq_desc[irq].affinity, cpu_online_map); - if (any_online_cpu(mask) == NR_CPUS) { + if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask) + >= nr_cpu_ids) { /* * Save it for phase 2 processing */ vectors_in_migration[irq] = irq; new_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of_cpu(new_cpu); /* * Al three are essential, currently WARN_ON.. maybe panic? @@ -168,7 +166,8 @@ static void migrate_irqs(void) if (desc->chip && desc->chip->disable && desc->chip->enable && desc->chip->set_affinity) { desc->chip->disable(irq); - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, + cpumask_of(new_cpu)); desc->chip->enable(irq); } else { WARN_ON((!(desc->chip) || !(desc->chip->disable) || diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 702a09c13238..890339339035 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -49,11 +49,12 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) +static void ia64_set_msi_irq_affinity(unsigned int irq, + const cpumask_t *cpu_mask) { struct msi_msg msg; u32 addr, data; - int cpu = first_cpu(cpu_mask); + int cpu = first_cpu(*cpu_mask); if (!cpu_online(cpu)) return; @@ -166,12 +167,11 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; - int cpu = first_cpu(mask); - + int cpu = cpumask_first(mask); if (!cpu_online(cpu)) return; @@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = mask; + irq_desc[irq].affinity = *mask; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 1dcbb85fc4ee..11463994a7d5 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -131,12 +131,6 @@ struct task_struct *task_for_booting_cpu; */ DEFINE_PER_CPU(int, cpu_state); -/* Bitmasks of currently online, and possible CPUs */ -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); -cpumask_t cpu_possible_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_possible_map); - cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); @@ -688,7 +682,7 @@ int migrate_platform_irqs(unsigned int cpu) { int new_cpei_cpu; irq_desc_t *desc = NULL; - cpumask_t mask; + const struct cpumask *mask; int retval = 0; /* @@ -701,7 +695,7 @@ int migrate_platform_irqs(unsigned int cpu) * Now re-target the CPEI to a different processor */ new_cpei_cpu = any_online_cpu(cpu_online_map); - mask = cpumask_of_cpu(new_cpei_cpu); + mask = cpumask_of(new_cpei_cpu); set_cpei_target_cpu(new_cpei_cpu); desc = irq_desc + ia64_cpe_irq; /* diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 65c10a42c88f..f0ebb342409d 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) now = ia64_get_itc(); delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); - account_system_time(prev, 0, delta_stime); - account_system_time_scaled(prev, delta_stime); + if (idle_task(smp_processor_id()) != prev) + account_system_time(prev, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); if (pi->ac_utime) { delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime); - account_user_time_scaled(prev, delta_utime); + account_user_time(prev, delta_utime, delta_utime); } pi->ac_stamp = ni->ac_stamp = now; @@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk) now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - account_system_time(tsk, 0, delta_stime); - account_system_time_scaled(tsk, delta_stime); + if (irq_count() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); ti->ac_stime = 0; ti->ac_stamp = now; @@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (ti->ac_utime) { delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime); - account_user_time_scaled(p, delta_utime); + account_user_time(p, delta_utime, delta_utime); ti->ac_utime = 0; } } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index c75b914f2d6b..a8d61a3e9a94 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) cpumask_t shared_cpu_map; cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); - len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); + len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); len += sprintf(buf+len, "\n"); return len; } diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 92cef66ca268..76464dc312e6 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o + vtlb.o process.o kvm_lib.o #Add link memcpy and memset to avoid possible structure assignment error kvm-intel-objs += memcpy.o memset.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c index 4e3dc13a619c..0c3564a7a033 100644 --- a/arch/ia64/kvm/asm-offsets.c +++ b/arch/ia64/kvm/asm-offsets.c @@ -24,19 +24,10 @@ #include <linux/autoconf.h> #include <linux/kvm_host.h> +#include <linux/kbuild.h> #include "vcpu.h" -#define task_struct kvm_vcpu - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : :) - -#define OFFSET(_sym, _str, _mem) \ - DEFINE(_sym, offsetof(_str, _mem)); - void foo(void) { DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index af1464f7a6ad..0f5ebd948437 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -180,7 +180,6 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_IRQCHIP: - case KVM_CAP_USER_MEMORY: case KVM_CAP_MP_STATE: r = 1; @@ -439,7 +438,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) expires = div64_u64(itc_diff, cyc_per_usec); kt = ktime_set(0, 1000 * expires); - down_read(&vcpu->kvm->slots_lock); vcpu->arch.ht_active = 1; hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); @@ -452,7 +450,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; - up_read(&vcpu->kvm->slots_lock); if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; @@ -476,6 +473,13 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu, return 1; } +static int handle_vcpu_debug(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + printk("VMM: %s", vcpu->arch.log_buf); + return 1; +} + static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { [EXIT_REASON_VM_PANIC] = handle_vm_error, @@ -487,6 +491,7 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_IPI] = handle_ipi, [EXIT_REASON_PTC_G] = handle_global_purge, + [EXIT_REASON_DEBUG] = handle_vcpu_debug, }; @@ -698,27 +703,24 @@ out: return r; } -/* - * Allocate 16M memory for every vm to hold its specific data. - * Its memory map is defined in kvm_host.h. - */ static struct kvm *kvm_alloc_kvm(void) { struct kvm *kvm; uint64_t vm_base; + BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE); + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); if (!vm_base) return ERR_PTR(-ENOMEM); - printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); - /* Zero all pages before use! */ memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); - - kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm = (struct kvm *)(vm_base + + offsetof(struct kvm_vm_data, kvm_vm_struct)); kvm->arch.vm_base = vm_base; + printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base); return kvm; } @@ -760,21 +762,12 @@ static void kvm_build_io_pmt(struct kvm *kvm) static void kvm_init_vm(struct kvm *kvm) { - long vm_base; - BUG_ON(!kvm); kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; kvm->arch.vmm_init_rr = VMM_INIT_RR; - vm_base = kvm->arch.vm_base; - if (vm_base) { - kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; - kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; - kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; - } - /* *Fill P2M entries for MMIO/IO ranges */ @@ -838,9 +831,8 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - int i; struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - int r; + int i; vcpu_load(vcpu); @@ -857,18 +849,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vpd->vpr = regs->vpd.vpr; - r = -EFAULT; - r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, - sizeof(union context)); - if (r) - goto out; - r = copy_from_user(vcpu + 1, regs->saved_stack + - sizeof(struct kvm_vcpu), - IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); - if (r) - goto out; - vcpu->arch.exit_data = - ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + memcpy(&vcpu->arch.guest, ®s->saved_guest, sizeof(union context)); RESTORE_REGS(mp_state); RESTORE_REGS(vmm_rr); @@ -902,9 +883,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) set_bit(KVM_REQ_RESUME, &vcpu->requests); vcpu_put(vcpu); - r = 0; -out: - return r; + + return 0; } long kvm_arch_vm_ioctl(struct file *filp, @@ -1166,10 +1146,11 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) /*Set entry address for first run.*/ regs->cr_iip = PALE_RESET_ENTRY; - /*Initilize itc offset for vcpus*/ + /*Initialize itc offset for vcpus*/ itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); - for (i = 0; i < MAX_VCPU_NUM; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + for (i = 0; i < KVM_MAX_VCPUS; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + + sizeof(struct kvm_vcpu_data) * i); v->arch.itc_offset = itc_offset; v->arch.last_itc = 0; } @@ -1183,7 +1164,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.apic->vcpu = vcpu; p_ctx->gr[1] = 0; - p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET); p_ctx->gr[13] = (unsigned long)vmm_vcpu; p_ctx->psr = 0x1008522000UL; p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ @@ -1218,12 +1199,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.hlt_timer.function = hlt_timer_fn; vcpu->arch.last_run_cpu = -1; - vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id); vcpu->arch.vsa_base = kvm_vsa_base; vcpu->arch.__gp = kvm_vmm_gp; vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); - vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); - vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id); init_ptce_info(vcpu); r = 0; @@ -1273,12 +1254,22 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int r; int cpu; + BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2); + + r = -EINVAL; + if (id >= KVM_MAX_VCPUS) { + printk(KERN_ERR"kvm: Can't configure vcpus > %ld", + KVM_MAX_VCPUS); + goto fail; + } + r = -ENOMEM; if (!vm_base) { printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); goto fail; } - vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data, + vcpu_data[id].vcpu_struct)); vcpu->kvm = kvm; cpu = get_cpu(); @@ -1374,9 +1365,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - int i; - int r; struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int i; + vcpu_load(vcpu); for (i = 0; i < 16; i++) { @@ -1391,14 +1382,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->vpd.vpsr = vpd->vpsr; regs->vpd.vpr = vpd->vpr; - r = -EFAULT; - r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, - sizeof(union context)); - if (r) - goto out; - r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); - if (r) - goto out; + memcpy(®s->saved_guest, &vcpu->arch.guest, sizeof(union context)); + SAVE_REGS(mp_state); SAVE_REGS(vmm_rr); memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); @@ -1426,10 +1411,9 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) SAVE_REGS(metaphysical_saved_rr4); SAVE_REGS(fp_psr); SAVE_REGS(saved_gp); + vcpu_put(vcpu); - r = 0; -out: - return r; + return 0; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) @@ -1457,6 +1441,9 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; unsigned long base_gfn = memslot->base_gfn; + if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) + return -ENOMEM; + for (i = 0; i < npages; i++) { pfn = gfn_to_pfn(kvm, base_gfn + i); if (!kvm_is_mmio_pfn(pfn)) { @@ -1631,8 +1618,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int r, i; long n, base; - unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS - + KVM_MEM_DIRTY_LOG_OFS); + unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c new file mode 100644 index 000000000000..a85cb611ecd7 --- /dev/null +++ b/arch/ia64/kvm/kvm_lib.c @@ -0,0 +1,15 @@ +/* + * kvm_lib.c: Compile some libraries for kvm-intel module. + * + * Just include kernel's library, and disable symbols export. + * Copyright (C) 2008, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#undef CONFIG_MODULES +#include "../../../lib/vsprintf.c" +#include "../../../lib/ctype.c" diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h index 2cc41d17cf99..b2bcaa2787aa 100644 --- a/arch/ia64/kvm/kvm_minstate.h +++ b/arch/ia64/kvm/kvm_minstate.h @@ -24,6 +24,8 @@ #include <asm/asmmacro.h> #include <asm/types.h> #include <asm/kregs.h> +#include <asm/kvm_host.h> + #include "asm-offsets.h" #define KVM_MINSTATE_START_SAVE_MIN \ @@ -33,7 +35,7 @@ addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ ;; \ lfetch.fault.excl.nt1 [r22]; \ - addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \ mov r23 = ar.bspstore; /* save ar.bspstore */ \ ;; \ mov ar.bspstore = r22; /* switch to kernel RBS */\ diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h index e585c4607344..dd979e00b574 100644 --- a/arch/ia64/kvm/misc.h +++ b/arch/ia64/kvm/misc.h @@ -27,7 +27,8 @@ */ static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) { - return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); + return (uint64_t *)(kvm->arch.vm_base + + offsetof(struct kvm_vm_data, kvm_p2m)); } static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c index 7f1a858bc69f..21f63fffc379 100644 --- a/arch/ia64/kvm/mmio.c +++ b/arch/ia64/kvm/mmio.c @@ -66,31 +66,25 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr, switch (addr) { case PIB_OFST_INTA: - /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ - panic_vm(v); + panic_vm(v, "Undefined write on PIB INTA\n"); break; case PIB_OFST_XTP: if (length == 1) { vlsapic_write_xtp(v, val); } else { - /*panic_domain(NULL, - "Undefined write on PIB XTP\n");*/ - panic_vm(v); + panic_vm(v, "Undefined write on PIB XTP\n"); } break; default: if (PIB_LOW_HALF(addr)) { - /*lower half */ + /*Lower half */ if (length != 8) - /*panic_domain(NULL, - "Can't LHF write with size %ld!\n", - length);*/ - panic_vm(v); + panic_vm(v, "Can't LHF write with size %ld!\n", + length); else vlsapic_write_ipi(v, addr, val); - } else { /* upper half - printk("IPI-UHF write %lx\n",addr);*/ - panic_vm(v); + } else { /*Upper half */ + panic_vm(v, "IPI-UHF write %lx\n", addr); } break; } @@ -108,22 +102,18 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, if (length == 1) /* 1 byte load */ ; /* There is no i8259, there is no INTA access*/ else - /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ - panic_vm(v); + panic_vm(v, "Undefined read on PIB INTA\n"); break; case PIB_OFST_XTP: if (length == 1) { result = VLSAPIC_XTP(v); - /* printk("read xtp %lx\n", result); */ } else { - /*panic_domain(NULL, - "Undefined read on PIB XTP\n");*/ - panic_vm(v); + panic_vm(v, "Undefined read on PIB XTP\n"); } break; default: - panic_vm(v); + panic_vm(v, "Undefined addr access for lsapic!\n"); break; } return result; @@ -162,7 +152,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, /* it's necessary to ensure zero extending */ *dest = p->u.ioreq.data & (~0UL >> (64-(s*8))); } else - panic_vm(vcpu); + panic_vm(vcpu, "Unhandled mmio access returned!\n"); out: local_irq_restore(psr); return ; @@ -324,7 +314,9 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) return; } else { inst_type = -1; - panic_vm(vcpu); + panic_vm(vcpu, "Unsupported MMIO access instruction! \ + Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", + bundle.i64[0], bundle.i64[1]); } size = 1 << size; @@ -335,7 +327,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) if (inst_type == SL_INTEGER) vcpu_set_gr(vcpu, inst.M1.r1, data, 0); else - panic_vm(vcpu); + panic_vm(vcpu, "Unsupported instruction type!\n"); } vcpu_increment_iip(vcpu); diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 800817307b7b..552d07724207 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -527,7 +527,8 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim, vector = vec2off[vec]; if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { - panic_vm(vcpu); + panic_vm(vcpu, "Interruption with vector :0x%lx occurs " + "with psr.ic = 0\n", vector); return; } @@ -586,7 +587,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); } else - panic_vm(vcpu); + panic_vm(vcpu, "Mis-set for exit reason!\n"); } static void set_sal_call_data(struct kvm_vcpu *vcpu) @@ -614,7 +615,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu) vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); } else - panic_vm(vcpu); + panic_vm(vcpu, "Mis-set for exit reason!\n"); } void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, @@ -680,7 +681,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu) vpsr = VCPU(vcpu, vpsr); isr = vpsr & IA64_PSR_RI; if (!(vpsr & IA64_PSR_IC)) - panic_vm(vcpu); + panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n"); reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ } @@ -941,8 +942,20 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu) ia64_set_pta(vcpu->arch.vhpt.pta.val); } +static void vmm_sanity_check(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) { + panic_vm(vcpu, "Failed to do vmm sanity check," + "it maybe caused by crashed vmm!!\n\n"); + } +} + static void kvm_do_resume_op(struct kvm_vcpu *vcpu) { + vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { vcpu_do_resume(vcpu); return; @@ -968,3 +981,11 @@ void vmm_transition(struct kvm_vcpu *vcpu) 1, 0, 0, 0, 0, 0); kvm_do_resume_op(vcpu); } + +void vmm_panic_handler(u64 vec) +{ + struct kvm_vcpu *vcpu = current_vcpu; + vmm_sanity = 0; + panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n", + vec2off[vec]); +} diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index e44027ce5667..ecd526b55323 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -816,8 +816,9 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) unsigned long vitv = VCPU(vcpu, itv); if (vcpu->vcpu_id == 0) { - for (i = 0; i < MAX_VCPU_NUM; i++) { - v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + for (i = 0; i < KVM_MAX_VCPUS; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + + sizeof(struct kvm_vcpu_data) * i); VMX(v, itc_offset) = itc_offset; VMX(v, last_itc) = 0; } @@ -1650,7 +1651,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) * Otherwise panic */ if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) - panic_vm(vcpu); + panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ + & vpsr.is=0\n"); /* * For those IA64_PSR bits: id/da/dd/ss/ed/ia @@ -2103,7 +2105,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu) if (is_physical_mode(vcpu)) { if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) - panic_vm(vcpu); + panic_vm(vcpu, "Machine Status conflicts!\n"); ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); ia64_dv_serialize_data(); @@ -2152,10 +2154,70 @@ int vmm_entry(void) return 0; } -void panic_vm(struct kvm_vcpu *v) -{ +static void kvm_show_registers(struct kvm_pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + struct kvm_vcpu *vcpu = current_vcpu; + if (vcpu != NULL) + printk("vcpu 0x%p vcpu %d\n", + vcpu, vcpu->vcpu_id); + + printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n", + regs->cr_ipsr, regs->cr_ifs, ip); + + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bspstore: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, + regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", + regs->f10.u.bits[1], regs->f10.u.bits[0], + regs->f11.u.bits[1], regs->f11.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, + regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, + regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, + regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, + regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, + regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, + regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, + regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, + regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, + regs->r30, regs->r31); + +} + +void panic_vm(struct kvm_vcpu *v, const char *fmt, ...) +{ + va_list args; + char buf[256]; + + struct kvm_pt_regs *regs = vcpu_regs(v); struct exit_ctl_data *p = &v->arch.exit_data; - + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + printk(buf); + kvm_show_registers(regs); p->exit_reason = EXIT_REASON_VM_PANIC; vmm_transition(v); /*Never to return*/ diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index e9b2a4e121c0..b2f12a562bdf 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h @@ -737,9 +737,12 @@ void kvm_init_vtlb(struct kvm_vcpu *v); void kvm_init_vhpt(struct kvm_vcpu *v); void thash_init(struct thash_cb *hcb, u64 sz); -void panic_vm(struct kvm_vcpu *v); +void panic_vm(struct kvm_vcpu *v, const char *fmt, ...); extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); + +extern long vmm_sanity; + #endif #endif /* __VCPU_H__ */ diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 2275bf4e681a..9eee5c04bacc 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -20,6 +20,7 @@ */ +#include<linux/kernel.h> #include<linux/module.h> #include<asm/fpswa.h> @@ -31,6 +32,8 @@ MODULE_LICENSE("GPL"); extern char kvm_ia64_ivt; extern fpswa_interface_t *vmm_fpswa_interface; +long vmm_sanity = 1; + struct kvm_vmm_info vmm_info = { .module = THIS_MODULE, .vmm_entry = vmm_entry, @@ -62,5 +65,31 @@ void vmm_spin_unlock(spinlock_t *lock) { _vmm_raw_spin_unlock(lock); } + +static void vcpu_debug_exit(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_DEBUG; + vmm_transition(vcpu); + local_irq_restore(psr); +} + +asmlinkage int printk(const char *fmt, ...) +{ + struct kvm_vcpu *vcpu = current_vcpu; + va_list args; + int r; + + memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN); + va_start(args, fmt); + r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args); + va_end(args); + vcpu_debug_exit(vcpu); + return r; +} + module_init(kvm_vmm_init) module_exit(kvm_vmm_exit) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index c1d7251a1480..3ef1a017a318 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S @@ -1,5 +1,5 @@ /* - * /ia64/kvm_ivt.S + * arch/ia64/kvm/vmm_ivt.S * * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co * Stephane Eranian <eranian@hpl.hp.com> @@ -70,32 +70,39 @@ # define PSR_DEFAULT_BITS 0 #endif - #define KVM_FAULT(n) \ - kvm_fault_##n:; \ - mov r19=n;; \ - br.sptk.many kvm_fault_##n; \ - ;; \ - + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_vmm_panic; \ + ;; \ #define KVM_REFLECT(n) \ - mov r31=pr; \ - mov r19=n; /* prepare to save predicates */ \ - mov r29=cr.ipsr; \ - ;; \ - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ -(p7)br.sptk.many kvm_dispatch_reflection; \ - br.sptk.many kvm_panic; \ - - -GLOBAL_ENTRY(kvm_panic) - br.sptk.many kvm_panic - ;; -END(kvm_panic) - - - - + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7) br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_vmm_panic; \ + +GLOBAL_ENTRY(kvm_vmm_panic) + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + br.call.sptk.many b6=vmm_panic_handler; +END(kvm_vmm_panic) .section .text.ivt,"ax" @@ -105,308 +112,307 @@ kvm_ia64_ivt: /////////////////////////////////////////////////////////////// // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) ENTRY(kvm_vhpt_miss) - KVM_FAULT(0) + KVM_FAULT(0) END(kvm_vhpt_miss) - .org kvm_ia64_ivt+0x400 //////////////////////////////////////////////////////////////// // 0x0400 Entry 1 (size 64 bundles) ITLB (21) ENTRY(kvm_itlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; - (p6) br.sptk kvm_alt_itlb_miss - mov r19 = 1 - br.sptk kvm_itlb_miss_dispatch - KVM_FAULT(1); + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); END(kvm_itlb_miss) .org kvm_ia64_ivt+0x0800 ////////////////////////////////////////////////////////////////// // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) ENTRY(kvm_dtlb_miss) - mov r31 = pr - mov r29=cr.ipsr; - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT; -(p6)br.sptk kvm_alt_dtlb_miss - br.sptk kvm_dtlb_miss_dispatch + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch END(kvm_dtlb_miss) .org kvm_ia64_ivt+0x0c00 //////////////////////////////////////////////////////////////////// // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) ENTRY(kvm_alt_itlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - mov r24=cr.ipsr - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r17,r19 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.i r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_itlb_miss) .org kvm_ia64_ivt+0x1000 ///////////////////////////////////////////////////////////////////// // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) ENTRY(kvm_alt_dtlb_miss) - mov r16=cr.ifa // get address that caused the TLB miss - ;; - movl r17=PAGE_KERNEL - movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) - mov r24=cr.ipsr - ;; - and r19=r19,r16 // clear ed, reserved bits, and PTE control bits - ;; - or r19=r19,r17 // insert PTE control bits into r19 - ;; - movl r20=IA64_GRANULE_SHIFT<<2 - ;; - mov cr.itir=r20 - ;; - itc.d r19 // insert the TLB entry - mov pr=r31,-1 - rfi + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi END(kvm_alt_dtlb_miss) .org kvm_ia64_ivt+0x1400 ////////////////////////////////////////////////////////////////////// // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) ENTRY(kvm_nested_dtlb_miss) - KVM_FAULT(5) + KVM_FAULT(5) END(kvm_nested_dtlb_miss) .org kvm_ia64_ivt+0x1800 ///////////////////////////////////////////////////////////////////// // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) ENTRY(kvm_ikey_miss) - KVM_REFLECT(6) + KVM_REFLECT(6) END(kvm_ikey_miss) .org kvm_ia64_ivt+0x1c00 ///////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) ENTRY(kvm_dkey_miss) - KVM_REFLECT(7) + KVM_REFLECT(7) END(kvm_dkey_miss) .org kvm_ia64_ivt+0x2000 //////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) ENTRY(kvm_dirty_bit) - KVM_REFLECT(8) + KVM_REFLECT(8) END(kvm_dirty_bit) .org kvm_ia64_ivt+0x2400 //////////////////////////////////////////////////////////////////// // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) ENTRY(kvm_iaccess_bit) - KVM_REFLECT(9) + KVM_REFLECT(9) END(kvm_iaccess_bit) .org kvm_ia64_ivt+0x2800 /////////////////////////////////////////////////////////////////// // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) ENTRY(kvm_daccess_bit) - KVM_REFLECT(10) + KVM_REFLECT(10) END(kvm_daccess_bit) .org kvm_ia64_ivt+0x2c00 ///////////////////////////////////////////////////////////////// // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) ENTRY(kvm_break_fault) - mov r31=pr - mov r19=11 - mov r29=cr.ipsr - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) - mov out0=cr.ifa - mov out2=cr.isr // FIXME: pity to make this slow access twice - mov out3=cr.iim // FIXME: pity to make this slow access twice - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15)ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out1=16,sp - br.call.sptk.many b6=kvm_ia64_handle_break - ;; + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; END(kvm_break_fault) .org kvm_ia64_ivt+0x3000 ///////////////////////////////////////////////////////////////// // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) ENTRY(kvm_interrupt) - mov r31=pr // prepare to save predicates - mov r19=12 - mov r29=cr.ipsr - ;; - tbit.z p6,p7=r29,IA64_PSR_VM_BIT - tbit.z p0,p15=r29,IA64_PSR_I_BIT - ;; -(p7) br.sptk kvm_dispatch_interrupt - ;; - mov r27=ar.rsc /* M */ - mov r20=r1 /* A */ - mov r25=ar.unat /* M */ - mov r26=ar.pfs /* I */ - mov r28=cr.iip /* M */ - cover /* B (or nothing) */ - ;; - mov r1=sp - ;; - invala /* M */ - mov r30=cr.ifs - ;; - addl r1=-VMM_PT_REGS_SIZE,r1 - ;; - adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ - adds r16=PT(CR_IPSR),r1 - ;; - lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES - st8 [r16]=r29 /* save cr.ipsr */ - ;; - lfetch.fault.excl.nt1 [r17] - mov r29=b0 - ;; - adds r16=PT(R8),r1 /* initialize first base pointer */ - adds r17=PT(R9),r1 /* initialize second base pointer */ - mov r18=r0 /* make sure r18 isn't NaT */ - ;; + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; .mem.offset 0,0; st8.spill [r16]=r8,16 .mem.offset 8,0; st8.spill [r17]=r9,16 ;; .mem.offset 0,0; st8.spill [r16]=r10,24 .mem.offset 8,0; st8.spill [r17]=r11,24 ;; - st8 [r16]=r28,16 /* save cr.iip */ - st8 [r17]=r30,16 /* save cr.ifs */ - mov r8=ar.fpsr /* M */ - mov r9=ar.csd - mov r10=ar.ssd - movl r11=FPSR_DEFAULT /* L-unit */ - ;; - st8 [r16]=r25,16 /* save ar.unat */ - st8 [r17]=r26,16 /* save ar.pfs */ - shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ - ;; - st8 [r16]=r27,16 /* save ar.rsc */ - adds r17=16,r17 /* skip over ar_rnat field */ - ;; - st8 [r17]=r31,16 /* save predicates */ - adds r16=16,r16 /* skip over ar_bspstore field */ - ;; - st8 [r16]=r29,16 /* save b0 */ - st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ - ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; .mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ .mem.offset 8,0; st8.spill [r17]=r12,16 - adds r12=-16,r1 - /* switch to kernel memory stack (with 16 bytes of scratch) */ - ;; + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; .mem.offset 0,0; st8.spill [r16]=r13,16 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - ;; + ;; .mem.offset 0,0; st8.spill [r16]=r15,16 .mem.offset 8,0; st8.spill [r17]=r14,16 - dep r14=-1,r0,60,4 - ;; + dep r14=-1,r0,60,4 + ;; .mem.offset 0,0; st8.spill [r16]=r2,16 .mem.offset 8,0; st8.spill [r17]=r3,16 - adds r2=VMM_PT_REGS_R16_OFFSET,r1 - adds r14 = VMM_VCPU_GP_OFFSET,r13 - ;; - mov r8=ar.ccv - ld8 r14 = [r14] - ;; - mov r1=r14 /* establish kernel global pointer */ - ;; \ - bsw.1 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - mov out0=r13 - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - adds r3=8,r2 // set up second base pointer for SAVE_REST - srlz.i // ensure everybody knows psr.ic is back on - ;; + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; .mem.offset 0,0; st8.spill [r2]=r16,16 .mem.offset 8,0; st8.spill [r3]=r17,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r18,16 .mem.offset 8,0; st8.spill [r3]=r19,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r20,16 .mem.offset 8,0; st8.spill [r3]=r21,16 - mov r18=b6 - ;; + mov r18=b6 + ;; .mem.offset 0,0; st8.spill [r2]=r22,16 .mem.offset 8,0; st8.spill [r3]=r23,16 - mov r19=b7 - ;; + mov r19=b7 + ;; .mem.offset 0,0; st8.spill [r2]=r24,16 .mem.offset 8,0; st8.spill [r3]=r25,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r26,16 .mem.offset 8,0; st8.spill [r3]=r27,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r28,16 .mem.offset 8,0; st8.spill [r3]=r29,16 - ;; + ;; .mem.offset 0,0; st8.spill [r2]=r30,16 .mem.offset 8,0; st8.spill [r3]=r31,32 - ;; - mov ar.fpsr=r11 /* M-unit */ - st8 [r2]=r8,8 /* ar.ccv */ - adds r24=PT(B6)-PT(F7),r3 - ;; - stf.spill [r2]=f6,32 - stf.spill [r3]=f7,32 - ;; - stf.spill [r2]=f8,32 - stf.spill [r3]=f9,32 - ;; - stf.spill [r2]=f10 - stf.spill [r3]=f11 - adds r25=PT(B7)-PT(F11),r3 - ;; - st8 [r24]=r18,16 /* b6 */ - st8 [r25]=r19,16 /* b7 */ - ;; - st8 [r24]=r9 /* ar.csd */ - st8 [r25]=r10 /* ar.ssd */ - ;; - srlz.d // make sure we see the effect of cr.ivr - addl r14=@gprel(ia64_leave_nested),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_ia64_handle_irq - ;; + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; END(kvm_interrupt) .global kvm_dispatch_vexirq @@ -414,387 +420,385 @@ END(kvm_interrupt) ////////////////////////////////////////////////////////////////////// // 0x3400 Entry 13 (size 64 bundles) Reserved ENTRY(kvm_virtual_exirq) - mov r31=pr - mov r19=13 - mov r30 =r0 - ;; + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; kvm_dispatch_vexirq: - cmp.eq p6,p0 = 1,r30 - ;; -(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; -(p6)ld8 r1 = [r29] - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,1,0 - mov out0=r13 - - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - adds r3=8,r2 // set up second base pointer - ;; - KVM_SAVE_REST - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - mov rp=r14 - br.call.sptk.many b6=kvm_vexirq + cmp.eq p6,p0 = 1,r30 + ;; +(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6) ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq END(kvm_virtual_exirq) .org kvm_ia64_ivt+0x3800 ///////////////////////////////////////////////////////////////////// // 0x3800 Entry 14 (size 64 bundles) Reserved - KVM_FAULT(14) - // this code segment is from 2.6.16.13 - + KVM_FAULT(14) + // this code segment is from 2.6.16.13 .org kvm_ia64_ivt+0x3c00 /////////////////////////////////////////////////////////////////////// // 0x3c00 Entry 15 (size 64 bundles) Reserved - KVM_FAULT(15) - + KVM_FAULT(15) .org kvm_ia64_ivt+0x4000 /////////////////////////////////////////////////////////////////////// // 0x4000 Entry 16 (size 64 bundles) Reserved - KVM_FAULT(16) + KVM_FAULT(16) .org kvm_ia64_ivt+0x4400 ////////////////////////////////////////////////////////////////////// // 0x4400 Entry 17 (size 64 bundles) Reserved - KVM_FAULT(17) + KVM_FAULT(17) .org kvm_ia64_ivt+0x4800 ////////////////////////////////////////////////////////////////////// // 0x4800 Entry 18 (size 64 bundles) Reserved - KVM_FAULT(18) + KVM_FAULT(18) .org kvm_ia64_ivt+0x4c00 ////////////////////////////////////////////////////////////////////// // 0x4c00 Entry 19 (size 64 bundles) Reserved - KVM_FAULT(19) + KVM_FAULT(19) .org kvm_ia64_ivt+0x5000 ////////////////////////////////////////////////////////////////////// // 0x5000 Entry 20 (size 16 bundles) Page Not Present ENTRY(kvm_page_not_present) - KVM_REFLECT(20) + KVM_REFLECT(20) END(kvm_page_not_present) .org kvm_ia64_ivt+0x5100 /////////////////////////////////////////////////////////////////////// // 0x5100 Entry 21 (size 16 bundles) Key Permission vector ENTRY(kvm_key_permission) - KVM_REFLECT(21) + KVM_REFLECT(21) END(kvm_key_permission) .org kvm_ia64_ivt+0x5200 ////////////////////////////////////////////////////////////////////// // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(kvm_iaccess_rights) - KVM_REFLECT(22) + KVM_REFLECT(22) END(kvm_iaccess_rights) .org kvm_ia64_ivt+0x5300 ////////////////////////////////////////////////////////////////////// // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(kvm_daccess_rights) - KVM_REFLECT(23) + KVM_REFLECT(23) END(kvm_daccess_rights) .org kvm_ia64_ivt+0x5400 ///////////////////////////////////////////////////////////////////// // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) ENTRY(kvm_general_exception) - KVM_REFLECT(24) - KVM_FAULT(24) + KVM_REFLECT(24) + KVM_FAULT(24) END(kvm_general_exception) .org kvm_ia64_ivt+0x5500 ////////////////////////////////////////////////////////////////////// // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) ENTRY(kvm_disabled_fp_reg) - KVM_REFLECT(25) + KVM_REFLECT(25) END(kvm_disabled_fp_reg) .org kvm_ia64_ivt+0x5600 //////////////////////////////////////////////////////////////////// // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) ENTRY(kvm_nat_consumption) - KVM_REFLECT(26) + KVM_REFLECT(26) END(kvm_nat_consumption) .org kvm_ia64_ivt+0x5700 ///////////////////////////////////////////////////////////////////// // 0x5700 Entry 27 (size 16 bundles) Speculation (40) ENTRY(kvm_speculation_vector) - KVM_REFLECT(27) + KVM_REFLECT(27) END(kvm_speculation_vector) .org kvm_ia64_ivt+0x5800 ///////////////////////////////////////////////////////////////////// // 0x5800 Entry 28 (size 16 bundles) Reserved - KVM_FAULT(28) + KVM_FAULT(28) .org kvm_ia64_ivt+0x5900 /////////////////////////////////////////////////////////////////// // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) ENTRY(kvm_debug_vector) - KVM_FAULT(29) + KVM_FAULT(29) END(kvm_debug_vector) .org kvm_ia64_ivt+0x5a00 /////////////////////////////////////////////////////////////// // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) ENTRY(kvm_unaligned_access) - KVM_REFLECT(30) + KVM_REFLECT(30) END(kvm_unaligned_access) .org kvm_ia64_ivt+0x5b00 ////////////////////////////////////////////////////////////////////// // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) ENTRY(kvm_unsupported_data_reference) - KVM_REFLECT(31) + KVM_REFLECT(31) END(kvm_unsupported_data_reference) .org kvm_ia64_ivt+0x5c00 //////////////////////////////////////////////////////////////////// // 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) ENTRY(kvm_floating_point_fault) - KVM_REFLECT(32) + KVM_REFLECT(32) END(kvm_floating_point_fault) .org kvm_ia64_ivt+0x5d00 ///////////////////////////////////////////////////////////////////// // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) ENTRY(kvm_floating_point_trap) - KVM_REFLECT(33) + KVM_REFLECT(33) END(kvm_floating_point_trap) .org kvm_ia64_ivt+0x5e00 ////////////////////////////////////////////////////////////////////// // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) ENTRY(kvm_lower_privilege_trap) - KVM_REFLECT(34) + KVM_REFLECT(34) END(kvm_lower_privilege_trap) .org kvm_ia64_ivt+0x5f00 ////////////////////////////////////////////////////////////////////// // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) ENTRY(kvm_taken_branch_trap) - KVM_REFLECT(35) + KVM_REFLECT(35) END(kvm_taken_branch_trap) .org kvm_ia64_ivt+0x6000 //////////////////////////////////////////////////////////////////// // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) ENTRY(kvm_single_step_trap) - KVM_REFLECT(36) + KVM_REFLECT(36) END(kvm_single_step_trap) .global kvm_virtualization_fault_back .org kvm_ia64_ivt+0x6100 ///////////////////////////////////////////////////////////////////// // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault ENTRY(kvm_virtualization_fault) - mov r31=pr - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - st8 [r16] = r1 - adds r17 = VMM_VCPU_GP_OFFSET, r21 - ;; - ld8 r1 = [r17] - cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 - cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 - cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 - cmp.eq p9,p0=EVENT_RSM,r24 - cmp.eq p10,p0=EVENT_SSM,r24 - cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 - cmp.eq p12,p0=EVENT_THASH,r24 - (p6) br.dptk.many kvm_asm_mov_from_ar - (p7) br.dptk.many kvm_asm_mov_from_rr - (p8) br.dptk.many kvm_asm_mov_to_rr - (p9) br.dptk.many kvm_asm_rsm - (p10) br.dptk.many kvm_asm_ssm - (p11) br.dptk.many kvm_asm_mov_to_psr - (p12) br.dptk.many kvm_asm_thash - ;; + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 +(p6) br.dptk.many kvm_asm_mov_from_ar +(p7) br.dptk.many kvm_asm_mov_from_rr +(p8) br.dptk.many kvm_asm_mov_to_rr +(p9) br.dptk.many kvm_asm_rsm +(p10) br.dptk.many kvm_asm_ssm +(p11) br.dptk.many kvm_asm_mov_to_psr +(p12) br.dptk.many kvm_asm_thash + ;; kvm_virtualization_fault_back: - adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 - ;; - ld8 r1 = [r16] - ;; - mov r19=37 - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - cmp.ne p6,p0=EVENT_RFI, r24 - (p6) br.sptk kvm_dispatch_virtualization_fault - ;; - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] - ;; - adds r18=VMM_VPD_VIFS_OFFSET,r18 - ;; - ld8 r18=[r18] - ;; - tbit.z p6,p0=r18,63 - (p6) br.sptk kvm_dispatch_virtualization_fault - ;; - //if vifs.v=1 desert current register frame - alloc r18=ar.pfs,0,0,0,0 - br.sptk kvm_dispatch_virtualization_fault + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 +(p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 +(p6) br.sptk kvm_dispatch_virtualization_fault + ;; +//if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault END(kvm_virtualization_fault) .org kvm_ia64_ivt+0x6200 ////////////////////////////////////////////////////////////// // 0x6200 Entry 38 (size 16 bundles) Reserved - KVM_FAULT(38) + KVM_FAULT(38) .org kvm_ia64_ivt+0x6300 ///////////////////////////////////////////////////////////////// // 0x6300 Entry 39 (size 16 bundles) Reserved - KVM_FAULT(39) + KVM_FAULT(39) .org kvm_ia64_ivt+0x6400 ///////////////////////////////////////////////////////////////// // 0x6400 Entry 40 (size 16 bundles) Reserved - KVM_FAULT(40) + KVM_FAULT(40) .org kvm_ia64_ivt+0x6500 ////////////////////////////////////////////////////////////////// // 0x6500 Entry 41 (size 16 bundles) Reserved - KVM_FAULT(41) + KVM_FAULT(41) .org kvm_ia64_ivt+0x6600 ////////////////////////////////////////////////////////////////// // 0x6600 Entry 42 (size 16 bundles) Reserved - KVM_FAULT(42) + KVM_FAULT(42) .org kvm_ia64_ivt+0x6700 ////////////////////////////////////////////////////////////////// // 0x6700 Entry 43 (size 16 bundles) Reserved - KVM_FAULT(43) + KVM_FAULT(43) .org kvm_ia64_ivt+0x6800 ////////////////////////////////////////////////////////////////// // 0x6800 Entry 44 (size 16 bundles) Reserved - KVM_FAULT(44) + KVM_FAULT(44) .org kvm_ia64_ivt+0x6900 /////////////////////////////////////////////////////////////////// // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception //(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) ENTRY(kvm_ia32_exception) - KVM_FAULT(45) + KVM_FAULT(45) END(kvm_ia32_exception) .org kvm_ia64_ivt+0x6a00 //////////////////////////////////////////////////////////////////// // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) ENTRY(kvm_ia32_intercept) - KVM_FAULT(47) + KVM_FAULT(47) END(kvm_ia32_intercept) .org kvm_ia64_ivt+0x6c00 ///////////////////////////////////////////////////////////////////// // 0x6c00 Entry 48 (size 16 bundles) Reserved - KVM_FAULT(48) + KVM_FAULT(48) .org kvm_ia64_ivt+0x6d00 ////////////////////////////////////////////////////////////////////// // 0x6d00 Entry 49 (size 16 bundles) Reserved - KVM_FAULT(49) + KVM_FAULT(49) .org kvm_ia64_ivt+0x6e00 ////////////////////////////////////////////////////////////////////// // 0x6e00 Entry 50 (size 16 bundles) Reserved - KVM_FAULT(50) + KVM_FAULT(50) .org kvm_ia64_ivt+0x6f00 ///////////////////////////////////////////////////////////////////// // 0x6f00 Entry 51 (size 16 bundles) Reserved - KVM_FAULT(52) + KVM_FAULT(52) .org kvm_ia64_ivt+0x7100 //////////////////////////////////////////////////////////////////// // 0x7100 Entry 53 (size 16 bundles) Reserved - KVM_FAULT(53) + KVM_FAULT(53) .org kvm_ia64_ivt+0x7200 ///////////////////////////////////////////////////////////////////// // 0x7200 Entry 54 (size 16 bundles) Reserved - KVM_FAULT(54) + KVM_FAULT(54) .org kvm_ia64_ivt+0x7300 //////////////////////////////////////////////////////////////////// // 0x7300 Entry 55 (size 16 bundles) Reserved - KVM_FAULT(55) + KVM_FAULT(55) .org kvm_ia64_ivt+0x7400 //////////////////////////////////////////////////////////////////// // 0x7400 Entry 56 (size 16 bundles) Reserved - KVM_FAULT(56) + KVM_FAULT(56) .org kvm_ia64_ivt+0x7500 ///////////////////////////////////////////////////////////////////// // 0x7500 Entry 57 (size 16 bundles) Reserved - KVM_FAULT(57) + KVM_FAULT(57) .org kvm_ia64_ivt+0x7600 ///////////////////////////////////////////////////////////////////// // 0x7600 Entry 58 (size 16 bundles) Reserved - KVM_FAULT(58) + KVM_FAULT(58) .org kvm_ia64_ivt+0x7700 //////////////////////////////////////////////////////////////////// // 0x7700 Entry 59 (size 16 bundles) Reserved - KVM_FAULT(59) + KVM_FAULT(59) .org kvm_ia64_ivt+0x7800 //////////////////////////////////////////////////////////////////// // 0x7800 Entry 60 (size 16 bundles) Reserved - KVM_FAULT(60) + KVM_FAULT(60) .org kvm_ia64_ivt+0x7900 ///////////////////////////////////////////////////////////////////// // 0x7900 Entry 61 (size 16 bundles) Reserved - KVM_FAULT(61) + KVM_FAULT(61) .org kvm_ia64_ivt+0x7a00 ///////////////////////////////////////////////////////////////////// // 0x7a00 Entry 62 (size 16 bundles) Reserved - KVM_FAULT(62) + KVM_FAULT(62) .org kvm_ia64_ivt+0x7b00 ///////////////////////////////////////////////////////////////////// // 0x7b00 Entry 63 (size 16 bundles) Reserved - KVM_FAULT(63) + KVM_FAULT(63) .org kvm_ia64_ivt+0x7c00 //////////////////////////////////////////////////////////////////// // 0x7c00 Entry 64 (size 16 bundles) Reserved - KVM_FAULT(64) + KVM_FAULT(64) .org kvm_ia64_ivt+0x7d00 ///////////////////////////////////////////////////////////////////// // 0x7d00 Entry 65 (size 16 bundles) Reserved - KVM_FAULT(65) + KVM_FAULT(65) .org kvm_ia64_ivt+0x7e00 ///////////////////////////////////////////////////////////////////// // 0x7e00 Entry 66 (size 16 bundles) Reserved - KVM_FAULT(66) + KVM_FAULT(66) .org kvm_ia64_ivt+0x7f00 //////////////////////////////////////////////////////////////////// // 0x7f00 Entry 67 (size 16 bundles) Reserved - KVM_FAULT(67) + KVM_FAULT(67) .org kvm_ia64_ivt+0x8000 // There is no particular reason for this code to be here, other than that @@ -804,132 +808,128 @@ END(kvm_ia32_intercept) ENTRY(kvm_dtlb_miss_dispatch) - mov r19 = 2 - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_dtlb_miss_dispatch) ENTRY(kvm_itlb_miss_dispatch) - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,3,0 - mov out0=cr.ifa - mov out1=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out2=16,r12 - br.call.sptk.many b6=kvm_page_fault + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault END(kvm_itlb_miss_dispatch) ENTRY(kvm_dispatch_reflection) - /* - * Input: - * psr.ic: off - * r19: intr type (offset into ivt, see ia64_int.h) - * r31: contains saved predicates (pr) - */ - KVM_SAVE_MIN_WITH_COVER_R19 - alloc r14=ar.pfs,0,0,5,0 - mov out0=cr.ifa - mov out1=cr.isr - mov out2=cr.iim - mov out3=r15 - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - adds out4=16,r12 - br.call.sptk.many b6=reflect_interruption +/* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption END(kvm_dispatch_reflection) ENTRY(kvm_dispatch_virtualization_fault) - adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 - adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 - ;; - st8 [r16] = r24 - st8 [r17] = r25 - ;; - KVM_SAVE_MIN_WITH_COVER_R19 - ;; - alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) - mov out0=r13 //vcpu - adds r3=8,r2 // set up second base pointer - ;; - ssm psr.ic - ;; - srlz.i // guarantee that interruption collection is on - ;; - //(p15) ssm psr.i // restore psr.i - addl r14=@gprel(ia64_leave_hypervisor_prepare),gp - ;; - KVM_SAVE_REST - KVM_SAVE_EXTRA - mov rp=r14 - ;; - adds out1=16,sp //regs - br.call.sptk.many b6=kvm_emulate + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate END(kvm_dispatch_virtualization_fault) ENTRY(kvm_dispatch_interrupt) - KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 - ;; - alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group - //mov out0=cr.ivr // pass cr.ivr as first arg - adds r3=8,r2 // set up second base pointer for SAVE_REST - ;; - ssm psr.ic - ;; - srlz.i - ;; - //(p15) ssm psr.i - addl r14=@gprel(ia64_leave_hypervisor),gp - ;; - KVM_SAVE_REST - mov rp=r14 - ;; - mov out0=r13 // pass pointer to pt_regs as second arg - br.call.sptk.many b6=kvm_ia64_handle_irq + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq END(kvm_dispatch_interrupt) - - - GLOBAL_ENTRY(ia64_leave_nested) rsm psr.i ;; @@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested) ;; ldf.fill f11=[r2] // mov r18=r13 -// mov r21=r13 +// mov r21=r13 adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 ;; @@ -1058,138 +1058,135 @@ GLOBAL_ENTRY(ia64_leave_nested) rfi END(ia64_leave_nested) - - GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) - /* - * work.need_resched etc. mustn't get changed - *by this CPU before it returns to - ;; - * user- or fsys-mode, hence we disable interrupts early on: - */ - adds r2 = PT(R4)+16,r12 - adds r3 = PT(R5)+16,r12 - adds r8 = PT(EML_UNAT)+16,r12 - ;; - ld8 r8 = [r8] - ;; - mov ar.unat=r8 - ;; - ld8.fill r4=[r2],16 //load r4 - ld8.fill r5=[r3],16 //load r5 - ;; - ld8.fill r6=[r2] //load r6 - ld8.fill r7=[r3] //load r7 - ;; +/* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; END(ia64_leave_hypervisor_prepare) //fall through GLOBAL_ENTRY(ia64_leave_hypervisor) - rsm psr.i - ;; - br.call.sptk.many b0=leave_hypervisor_tail - ;; - adds r20=PT(PR)+16,r12 - adds r8=PT(EML_UNAT)+16,r12 - ;; - ld8 r8=[r8] - ;; - mov ar.unat=r8 - ;; - lfetch [r20],PT(CR_IPSR)-PT(PR) - adds r2 = PT(B6)+16,r12 - adds r3 = PT(B7)+16,r12 - ;; - lfetch [r20] - ;; - ld8 r24=[r2],16 /* B6 */ - ld8 r25=[r3],16 /* B7 */ - ;; - ld8 r26=[r2],16 /* ar_csd */ - ld8 r27=[r3],16 /* ar_ssd */ - mov b6 = r24 - ;; - ld8.fill r8=[r2],16 - ld8.fill r9=[r3],16 - mov b7 = r25 - ;; - mov ar.csd = r26 - mov ar.ssd = r27 - ;; - ld8.fill r10=[r2],PT(R15)-PT(R10) - ld8.fill r11=[r3],PT(R14)-PT(R11) - ;; - ld8.fill r15=[r2],PT(R16)-PT(R15) - ld8.fill r14=[r3],PT(R17)-PT(R14) - ;; - ld8.fill r16=[r2],16 - ld8.fill r17=[r3],16 - ;; - ld8.fill r18=[r2],16 - ld8.fill r19=[r3],16 - ;; - ld8.fill r20=[r2],16 - ld8.fill r21=[r3],16 - ;; - ld8.fill r22=[r2],16 - ld8.fill r23=[r3],16 - ;; - ld8.fill r24=[r2],16 - ld8.fill r25=[r3],16 - ;; - ld8.fill r26=[r2],16 - ld8.fill r27=[r3],16 - ;; - ld8.fill r28=[r2],16 - ld8.fill r29=[r3],16 - ;; - ld8.fill r30=[r2],PT(F6)-PT(R30) - ld8.fill r31=[r3],PT(F7)-PT(R31) - ;; - rsm psr.i | psr.ic - // initiate turning off of interrupt and interruption collection - invala // invalidate ALAT - ;; - srlz.i // ensure interruption collection is off - ;; - bsw.0 - ;; - adds r16 = PT(CR_IPSR)+16,r12 - adds r17 = PT(CR_IIP)+16,r12 - mov r21=r13 // get current - ;; - ld8 r31=[r16],16 // load cr.ipsr - ld8 r30=[r17],16 // load cr.iip - ;; - ld8 r29=[r16],16 // load cr.ifs - ld8 r28=[r17],16 // load ar.unat - ;; - ld8 r27=[r16],16 // load ar.pfs - ld8 r26=[r17],16 // load ar.rsc - ;; - ld8 r25=[r16],16 // load ar.rnat - ld8 r24=[r17],16 // load ar.bspstore - ;; - ld8 r23=[r16],16 // load predicates - ld8 r22=[r17],16 // load b0 - ;; - ld8 r20=[r16],16 // load ar.rsc value for "loadrs" - ld8.fill r1=[r17],16 //load r1 - ;; - ld8.fill r12=[r16],16 //load r12 - ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 - ;; - ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr - ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 - ;; - ld8.fill r3=[r16] //load r3 - ld8 r18=[r17] //load ar_ccv - ;; - mov ar.fpsr=r19 - mov ar.ccv=r18 - shr.u r18=r20,16 - ;; + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; kvm_rbs_switch: - mov r19=96 + mov r19=96 kvm_dont_preserve_current_frame: /* @@ -1201,76 +1198,76 @@ kvm_dont_preserve_current_frame: # define pReturn p7 # define Nregs 14 - alloc loc0=ar.pfs,2,Nregs-2,2,0 - shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) - sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize - ;; - mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" - shladd in0=loc1,3,r19 - mov in1=0 - ;; - TEXT_ALIGN(32) + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) kvm_rse_clear_invalid: - alloc loc0=ar.pfs,2,Nregs-2,2,0 - cmp.lt pRecurse,p0=Nregs*8,in0 - // if more than Nregs regs left to clear, (re)curse - add out0=-Nregs*8,in0 - add out1=1,in1 // increment recursion count - mov loc1=0 - mov loc2=0 - ;; - mov loc3=0 - mov loc4=0 - mov loc5=0 - mov loc6=0 - mov loc7=0 + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 (pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid - ;; - mov loc8=0 - mov loc9=0 - cmp.ne pReturn,p0=r0,in1 - // if recursion count != 0, we need to do a br.ret - mov loc10=0 - mov loc11=0 + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 (pReturn) br.ret.dptk.many b0 # undef pRecurse # undef pReturn // loadrs has already been shifted - alloc r16=ar.pfs,0,0,0,0 // drop current register frame - ;; - loadrs - ;; - mov ar.bspstore=r24 - ;; - mov ar.unat=r28 - mov ar.rnat=r25 - mov ar.rsc=r26 - ;; - mov cr.ipsr=r31 - mov cr.iip=r30 - mov cr.ifs=r29 - mov ar.pfs=r27 - adds r18=VMM_VPD_BASE_OFFSET,r21 - ;; - ld8 r18=[r18] //vpd - adds r17=VMM_VCPU_ISR_OFFSET,r21 - ;; - ld8 r17=[r17] - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] //vpsr - mov r25=r18 - adds r16= VMM_VCPU_GP_OFFSET,r21 - ;; - ld8 r16= [r16] // Put gp in r24 - movl r24=@gprel(ia64_vmm_entry) // calculate return address - ;; - add r24=r24,r16 - ;; - br.sptk.many kvm_vps_sync_write // call the service - ;; + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + br.sptk.many kvm_vps_sync_write // call the service + ;; END(ia64_leave_hypervisor) // fall through GLOBAL_ENTRY(ia64_vmm_entry) @@ -1283,16 +1280,14 @@ GLOBAL_ENTRY(ia64_vmm_entry) * r22:b0 * r23:predicate */ - mov r24=r22 - mov r25=r18 - tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic - (p1) br.cond.sptk.few kvm_vps_resume_normal - (p2) br.cond.sptk.many kvm_vps_resume_handler - ;; + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic +(p1) br.cond.sptk.few kvm_vps_resume_normal +(p2) br.cond.sptk.many kvm_vps_resume_handler + ;; END(ia64_vmm_entry) - - /* * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, * u64 arg3, u64 arg4, u64 arg5, @@ -1310,88 +1305,88 @@ psrsave = loc2 entry = loc3 hostret = r24 - alloc pfssave=ar.pfs,4,4,0,0 - mov rpsave=rp - adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 - ;; - ld8 entry=[entry] -1: mov hostret=ip - mov r25=in1 // copy arguments - mov r26=in2 - mov r27=in3 - mov psrsave=psr - ;; - tbit.nz p6,p0=psrsave,14 // IA64_PSR_I - tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC - ;; - add hostret=2f-1b,hostret // calculate return address - add entry=entry,in0 - ;; - rsm psr.i | psr.ic - ;; - srlz.i - mov b6=entry - br.cond.sptk b6 // call the service + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service 2: - // Architectural sequence for enabling interrupts if necessary +// Architectural sequence for enabling interrupts if necessary (p7) ssm psr.ic - ;; + ;; (p7) srlz.i - ;; + ;; //(p6) ssm psr.i - ;; - mov rp=rpsave - mov ar.pfs=pfssave - mov r8=r31 - ;; - srlz.d - br.ret.sptk rp + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp END(ia64_call_vsa) #define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) GLOBAL_ENTRY(vmm_reset_entry) - //set up ipsr, iip, vpd.vpsr, dcr - // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 - // For DCR: all bits 0 - bsw.0 - ;; - mov r21 =r13 - adds r14=-VMM_PT_REGS_SIZE, r12 - ;; - movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 - movl r10=0x8000000000000000 - adds r16=PT(CR_IIP), r14 - adds r20=PT(R1), r14 - ;; - rsm psr.ic | psr.i - ;; - srlz.i - ;; - mov ar.rsc = 0 - ;; - flushrs - ;; - mov ar.bspstore = 0 - // clear BSPSTORE - ;; - mov cr.ipsr=r6 - mov cr.ifs=r10 - ld8 r4 = [r16] // Set init iip for first run. - ld8 r1 = [r20] - ;; - mov cr.iip=r4 - adds r16=VMM_VPD_BASE_OFFSET,r13 - ;; - ld8 r18=[r16] - ;; - adds r19=VMM_VPD_VPSR_OFFSET,r18 - ;; - ld8 r19=[r19] - mov r17=r0 - mov r22=r0 - mov r23=r0 - br.cond.sptk ia64_vmm_entry - br.ret.sptk b0 + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + bsw.0 + ;; + mov r21 =r13 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + adds r16=VMM_VPD_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 END(vmm_reset_entry) diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index e22b93361e08..6b6307a3bd55 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) u64 i, dirty_pages = 1; u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); - void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) - + KVM_MEM_DIRTY_LOG_OFS; + void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; vmm_spin_lock(lock); diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 0c66dbdd1d72..66fd705e82c0 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -227,14 +227,14 @@ finish_up: return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) +static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; int slice; - nasid = cpuid_to_nasid(first_cpu(mask)); - slice = cpuid_to_slice(first_cpu(mask)); + nasid = cpuid_to_nasid(cpumask_first(mask)); + slice = cpuid_to_slice(cpumask_first(mask)); list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 83f190ffe350..ca553b0429ce 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -151,7 +151,8 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) +static void sn_set_msi_irq_affinity(unsigned int irq, + const struct cpumask *cpu_mask) { struct msi_msg msg; int slice; @@ -164,7 +165,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) struct sn_pcibus_provider *provider; unsigned int cpu; - cpu = first_cpu(cpu_mask); + cpu = cpumask_first(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return; @@ -204,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = cpu_mask; + irq_desc[irq].affinity = *cpu_mask; } #endif /* CONFIG_SMP */ diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 29047d5c259a..cabba332cc48 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -10,6 +10,7 @@ config M32R default y select HAVE_IDE select HAVE_OPROFILE + select INIT_ALL_POSSIBLE config SBUS bool diff --git a/arch/m32r/kernel/init_task.c b/arch/m32r/kernel/init_task.c index 0d658dbb6766..016885c6f260 100644 --- a/arch/m32r/kernel/init_task.c +++ b/arch/m32r/kernel/init_task.c @@ -11,7 +11,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 39cb6da72dcb..0f06b3722e96 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -73,17 +73,11 @@ static unsigned int bsp_phys_id = -1; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; -/* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - cpumask_t cpu_bootout_map; cpumask_t cpu_bootin_map; static cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); /* Per CPU bogomips and other parameters */ struct cpuinfo_m32r cpu_data[NR_CPUS] __cacheline_aligned; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 836fb66f080d..c825bde17cb3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -280,7 +280,6 @@ config M68060 config MMU_MOTOROLA bool - depends on MMU && !MMU_SUN3 config MMU_SUN3 bool diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 3042c2bc8c58..632ce016014d 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -40,7 +40,6 @@ * alignment requirements and potentially different initial * setup. */ -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/kernel/init_task.c b/arch/m68knommu/kernel/init_task.c index 344c01aede08..fe282de1d596 100644 --- a/arch/m68knommu/kernel/init_task.c +++ b/arch/m68knommu/kernel/init_task.c @@ -12,7 +12,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/m68knommu/platform/coldfire/pit.c b/arch/m68knommu/platform/coldfire/pit.c index c5b916700b22..2a12e7fa9748 100644 --- a/arch/m68knommu/platform/coldfire/pit.c +++ b/arch/m68knommu/platform/coldfire/pit.c @@ -156,7 +156,7 @@ void hw_timer_init(void) { u32 imr; - cf_pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + cf_pit_clockevent.cpumask = cpumask_of(smp_processor_id()); cf_pit_clockevent.mult = div_sc(FREQ, NSEC_PER_SEC, 32); cf_pit_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFF, &cf_pit_clockevent); diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index a58f0eecc68f..abc62aa744ac 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -49,7 +49,8 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include <linux/cpumask.h> -extern void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity); +extern void plat_set_irq_affinity(unsigned int irq, + const struct cpumask *affinity); extern void smtc_forward_irq(unsigned int irq); /* diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 7785bec732f2..1fb959f98982 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 0ff5b523ea77..86557b5d1b3f 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -38,9 +38,6 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_RESCHEDULE_YOURSELF 0x1 /* XXX braindead */ #define SMP_CALL_FUNCTION 0x2 -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map - extern void asmlinkage smp_bootstrap(void); /* diff --git a/arch/mips/jazz/irq.c b/arch/mips/jazz/irq.c index d7f8a782aae4..03965cb1b252 100644 --- a/arch/mips/jazz/irq.c +++ b/arch/mips/jazz/irq.c @@ -146,7 +146,7 @@ void __init plat_time_init(void) BUG_ON(HZ != 100); - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(JAZZ_TIMER_IRQ, action); diff --git a/arch/mips/kernel/cevt-bcm1480.c b/arch/mips/kernel/cevt-bcm1480.c index 0a57f86945f1..b820661678b0 100644 --- a/arch/mips/kernel/cevt-bcm1480.c +++ b/arch/mips/kernel/cevt-bcm1480.c @@ -126,7 +126,7 @@ void __cpuinit sb1480_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -148,6 +148,6 @@ void __cpuinit sb1480_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of_cpu(cpu)); + irq_set_affinity(irq, cpumask_of(cpu)); setup_irq(irq, action); } diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c index df4acb68bfb5..1ada45ea0700 100644 --- a/arch/mips/kernel/cevt-ds1287.c +++ b/arch/mips/kernel/cevt-ds1287.c @@ -88,7 +88,6 @@ static void ds1287_event_handler(struct clock_event_device *dev) static struct clock_event_device ds1287_clockevent = { .name = "ds1287", .features = CLOCK_EVT_FEAT_PERIODIC, - .cpumask = CPU_MASK_CPU0, .set_next_event = ds1287_set_next_event, .set_mode = ds1287_set_mode, .event_handler = ds1287_event_handler, @@ -122,6 +121,7 @@ int __init ds1287_clockevent_init(int irq) clockevent_set_clock(cd, 32768); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + cd->cpumask = cpumask_of(0); clockevents_register_device(&ds1287_clockevent); diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c index 6e2f58520afb..e9b787feedcb 100644 --- a/arch/mips/kernel/cevt-gt641xx.c +++ b/arch/mips/kernel/cevt-gt641xx.c @@ -96,7 +96,6 @@ static void gt641xx_timer0_event_handler(struct clock_event_device *dev) static struct clock_event_device gt641xx_timer0_clockevent = { .name = "gt641xx-timer0", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .cpumask = CPU_MASK_CPU0, .irq = GT641XX_TIMER0_IRQ, .set_next_event = gt641xx_timer0_set_next_event, .set_mode = gt641xx_timer0_set_mode, @@ -132,6 +131,7 @@ static int __init gt641xx_timer0_clockevent_init(void) clockevent_set_clock(cd, gt641xx_base_clock); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + cd->cpumask = cpumask_of(0); clockevents_register_device(>641xx_timer0_clockevent); diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 4a4c59f2737a..e1ec83b68031 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -195,7 +195,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/arch/mips/kernel/cevt-sb1250.c b/arch/mips/kernel/cevt-sb1250.c index 63ac3ad462bc..a2eebaafda52 100644 --- a/arch/mips/kernel/cevt-sb1250.c +++ b/arch/mips/kernel/cevt-sb1250.c @@ -125,7 +125,7 @@ void __cpuinit sb1250_clockevent_init(void) cd->min_delta_ns = clockevent_delta2ns(2, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = sibyte_next_event; cd->set_mode = sibyte_set_mode; clockevents_register_device(cd); @@ -147,6 +147,6 @@ void __cpuinit sb1250_clockevent_init(void) action->name = name; action->dev_id = cd; - irq_set_affinity(irq, cpumask_of_cpu(cpu)); + irq_set_affinity(irq, cpumask_of(cpu)); setup_irq(irq, action); } diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c index 5162fe4b5952..6d45e24db5bf 100644 --- a/arch/mips/kernel/cevt-smtc.c +++ b/arch/mips/kernel/cevt-smtc.c @@ -292,7 +292,7 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; diff --git a/arch/mips/kernel/cevt-txx9.c b/arch/mips/kernel/cevt-txx9.c index b5fc4eb412d2..eccf7d6096bd 100644 --- a/arch/mips/kernel/cevt-txx9.c +++ b/arch/mips/kernel/cevt-txx9.c @@ -112,7 +112,6 @@ static struct clock_event_device txx9tmr_clock_event_device = { .name = "TXx9", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .rating = 200, - .cpumask = CPU_MASK_CPU0, .set_mode = txx9tmr_set_mode, .set_next_event = txx9tmr_set_next_event, }; @@ -150,6 +149,7 @@ void __init txx9_clockevent_init(unsigned long baseaddr, int irq, clockevent_delta2ns(0xffffffff >> (32 - TXX9_TIMER_BITS), cd); cd->min_delta_ns = clockevent_delta2ns(0xf, cd); cd->irq = irq; + cd->cpumask = cpumask_of(0), clockevents_register_device(cd); setup_irq(irq, &txx9tmr_irq); printk(KERN_INFO "TXx9: clockevent device at 0x%lx, irq %d\n", diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index b6ac55162b9a..f4d187825f96 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c @@ -115,7 +115,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevent_set_clock(cd, CLOCK_TICK_RATE); cd->max_delta_ns = clockevent_delta2ns(0x7FFF, cd); cd->min_delta_ns = clockevent_delta2ns(0xF, cd); diff --git a/arch/mips/kernel/init_task.c b/arch/mips/kernel/init_task.c index d72487ad7c15..149cd914526e 100644 --- a/arch/mips/kernel/init_task.c +++ b/arch/mips/kernel/init_task.c @@ -9,7 +9,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index f0a4bb19e096..494a49a317e9 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) +static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -164,7 +164,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) pr_debug(KERN_DEBUG "%s called\n", __func__); irq -= _irqbase; - cpus_and(tmp, cpumask, cpu_online_map); + cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) return; @@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned int irq, cpumask_t cpumask) set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask); } - irq_desc[irq].affinity = cpumask; + irq_desc[irq].affinity = *cpumask; spin_unlock_irqrestore(&gic_lock, flags); } diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index ca476c4f62a5..f27beca4b26d 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -51,10 +51,10 @@ static int __init allowcpus(char *str) int len; cpus_clear(cpu_allow_map); - if (cpulist_parse(str, cpu_allow_map) == 0) { + if (cpulist_parse(str, &cpu_allow_map) == 0) { cpu_set(0, cpu_allow_map); cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); - len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); + len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map); buf[len] = '\0'; pr_debug("Allowable CPUs: %s\n", buf); return 1; @@ -226,7 +226,7 @@ void __init cmp_smp_setup(void) for (i = 1; i < NR_CPUS; i++) { if (amon_cpu_avail(i)) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++ncpu; __cpu_logical_map[ncpu] = i; } diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c index 87a1816c1f45..6f7ee5ac46ee 100644 --- a/arch/mips/kernel/smp-mt.c +++ b/arch/mips/kernel/smp-mt.c @@ -70,7 +70,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0, write_vpe_c0_vpeconf0(tmp); /* Record this as available CPU */ - cpu_set(tc, phys_cpu_present_map); + cpu_set(tc, cpu_possible_map); __cpu_number_map[tc] = ++ncpu; __cpu_logical_map[ncpu] = tc; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8bf88faf5afd..3da94704f816 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -44,15 +44,10 @@ #include <asm/mipsmtregs.h> #endif /* CONFIG_MIPS_MT_SMTC */ -cpumask_t phys_cpu_present_map; /* Bitmask of available CPUs */ volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ -cpumask_t cpu_online_map; /* Bitmask of currently online CPUs */ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ -EXPORT_SYMBOL(phys_cpu_present_map); -EXPORT_SYMBOL(cpu_online_map); - extern void cpu_idle(void); /* Number of TCs (or siblings in Intel speak) per CPU core */ @@ -195,7 +190,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* preload SMP state for boot cpu */ void __devinit smp_prepare_boot_cpu(void) { - cpu_set(0, phys_cpu_present_map); + cpu_set(0, cpu_possible_map); cpu_set(0, cpu_online_map); cpu_set(0, cpu_callin_map); } diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 897fb2b4751c..b6cca01ff82b 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -290,7 +290,7 @@ static void smtc_configure_tlb(void) * possibly leave some TCs/VPEs as "slave" processors. * * Use c0_MVPConf0 to find out how many TCs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. */ int __init smtc_build_cpu_map(int start_cpu_slot) @@ -304,7 +304,7 @@ int __init smtc_build_cpu_map(int start_cpu_slot) */ ntcs = ((read_c0_mvpconf0() & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1; for (i=start_cpu_slot; i<NR_CPUS && i<ntcs; i++) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } @@ -521,7 +521,7 @@ void smtc_prepare_cpus(int cpus) * Pull any physically present but unused TCs out of circulation. */ while (tc < (((val & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1)) { - cpu_clear(tc, phys_cpu_present_map); + cpu_clear(tc, cpu_possible_map); cpu_clear(tc, cpu_present_map); tc++; } diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index f84a46a8ae6e..aabd7274507b 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -114,9 +114,9 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) +void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { - cpumask_t tmask = affinity; + cpumask_t tmask = *affinity; int cpu = 0; void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff); @@ -139,7 +139,7 @@ void plat_set_irq_affinity(unsigned int irq, cpumask_t affinity) * be made to forward to an offline "CPU". */ - for_each_cpu_mask(cpu, affinity) { + for_each_cpu(cpu, affinity) { if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu)) cpu_clear(cpu, tmask); } diff --git a/arch/mips/nxp/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c index 62f495b57f93..cf293b279098 100644 --- a/arch/mips/nxp/pnx8550/common/time.c +++ b/arch/mips/nxp/pnx8550/common/time.c @@ -102,6 +102,7 @@ __init void plat_time_init(void) unsigned int p; unsigned int pow2p; + pnx8xxx_clockevent.cpumask = cpu_none_mask; clockevents_register_device(&pnx8xxx_clockevent); clocksource_register(&pnx_clocksource); diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index 3a7df647ca77..f78c29b68d77 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -141,7 +141,7 @@ static void __cpuinit yos_boot_secondary(int cpu, struct task_struct *idle) } /* - * Detect available CPUs, populate phys_cpu_present_map before smp_init + * Detect available CPUs, populate cpu_possible_map before smp_init * * We don't want to start the secondary CPU yet nor do we have a nice probing * feature in PMON so we just assume presence of the secondary core. @@ -150,10 +150,10 @@ static void __init yos_smp_setup(void) { int i; - cpus_clear(phys_cpu_present_map); + cpus_clear(cpu_possible_map); for (i = 0; i < 2; i++) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; } diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index ba5cdebeaf0d..5b47d6b65275 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -76,7 +76,7 @@ static int do_cpumask(cnodeid_t cnode, nasid_t nasid, int highest) /* Only let it join in if it's marked enabled */ if ((acpu->cpu_info.flags & KLINFO_ENABLE) && (tot_cpus_found != NR_CPUS)) { - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); alloc_cpupda(cpuid, tot_cpus_found); cpus_found++; tot_cpus_found++; diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 1327c2746fb7..f024057a35f8 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -134,7 +134,7 @@ void __cpuinit hub_rt_clock_event_init(void) cd->min_delta_ns = clockevent_delta2ns(0x300, cd); cd->rating = 200; cd->irq = irq; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = rt_next_event; cd->set_mode = rt_set_mode; clockevents_register_device(cd); diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index a35818ed4263..12b465d404df 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask); +static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) +static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -117,11 +117,11 @@ static void bcm1480_set_affinity(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int irq_dirty; - if (cpus_weight(mask) != 1) { + if (cpumask_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } - i = first_cpu(mask); + i = cpumask_first(mask); /* Convert logical CPU to physical CPU */ cpu = cpu_logical_map(i); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index bd9eeb43ed0e..dddfda8e8294 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -136,7 +136,7 @@ static void __cpuinit bcm1480_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -145,14 +145,14 @@ static void __init bcm1480_smp_setup(void) { int i, num; - cpus_clear(phys_cpu_present_map); - cpu_set(0, phys_cpu_present_map); + cpus_clear(cpu_possible_map); + cpu_set(0, cpu_possible_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index a5158483986e..808ac2959b8c 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, cpumask_t mask); +static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,16 +103,16 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, cpumask_t mask) +static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; struct irq_desc *desc = irq_desc + irq; unsigned long flags; - i = first_cpu(mask); + i = cpumask_first(mask); - if (cpus_weight(mask) > 1) { + if (cpumask_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); return; } diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index 0734b933e969..5950a288a7da 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -124,7 +124,7 @@ static void __cpuinit sb1250_boot_secondary(int cpu, struct task_struct *idle) /* * Use CFE to find out how many CPUs are available, setting up - * phys_cpu_present_map and the logical/physical mappings. + * cpu_possible_map and the logical/physical mappings. * XXXKW will the boot CPU ever not be physical 0? * * Common setup before any secondaries are started @@ -133,14 +133,14 @@ static void __init sb1250_smp_setup(void) { int i, num; - cpus_clear(phys_cpu_present_map); - cpu_set(0, phys_cpu_present_map); + cpus_clear(cpu_possible_map); + cpu_set(0, cpu_possible_map); __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; for (i = 1, num = 0; i < NR_CPUS; i++) { if (cfe_cpu_stop(i) == 0) { - cpu_set(i, phys_cpu_present_map); + cpu_set(i, cpu_possible_map); __cpu_number_map[i] = ++num; __cpu_logical_map[num] = i; } diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 796e3ce28720..69f5f88711cc 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -80,7 +80,7 @@ static void __init sni_a20r_timer_setup(void) struct irqaction *action = &a20r_irqaction; unsigned int cpu = smp_processor_id(); - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); clockevents_register_device(cd); action->dev_id = cd; setup_irq(SNI_A20R_IRQ_TIMER, &a20r_irqaction); diff --git a/arch/mn10300/kernel/init_task.c b/arch/mn10300/kernel/init_task.c index af16f6e5c918..5ac3566f8c98 100644 --- a/arch/mn10300/kernel/init_task.c +++ b/arch/mn10300/kernel/init_task.c @@ -18,7 +18,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 644a70b1b04e..aacf11d33723 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select HAVE_OPROFILE select RTC_CLASS select RTC_DRV_PARISC + select INIT_ALL_POSSIBLE help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/arch/parisc/kernel/init_task.c b/arch/parisc/kernel/init_task.c index f5941c086551..1e25a45d64c1 100644 --- a/arch/parisc/kernel/init_task.c +++ b/arch/parisc/kernel/init_task.c @@ -34,7 +34,6 @@ #include <asm/pgtable.h> #include <asm/pgalloc.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 23ef950df008..4cea935e2f99 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -131,12 +131,12 @@ int cpu_check_affinity(unsigned int irq, cpumask_t *dest) return 0; } -static void cpu_set_affinity_irq(unsigned int irq, cpumask_t dest) +static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { - if (cpu_check_affinity(irq, &dest)) + if (cpu_check_affinity(irq, dest)) return; - irq_desc[irq].affinity = dest; + irq_desc[irq].affinity = *dest; } #endif diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index d47f3975c9c6..80bc000523fa 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -67,21 +67,6 @@ static volatile int cpu_now_booting __read_mostly = 0; /* track which CPU is boo static int parisc_max_cpus __read_mostly = 1; -/* online cpus are ones that we've managed to bring up completely - * possible cpus are all valid cpu - * present cpus are all detected cpu - * - * On startup we bring up the "possible" cpus. Since we discover - * CPUs later, we add them as hotplug, so the possible cpu mask is - * empty in the beginning. - */ - -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; /* Bitmap of online CPUs */ -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; /* Bitmap of Present CPUs */ - -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); - DEFINE_PER_CPU(spinlock_t, ipi_lock) = SPIN_LOCK_UNLOCKED; enum ipi_message_type { diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h new file mode 100644 index 000000000000..9b198d1b3b2b --- /dev/null +++ b/arch/powerpc/include/asm/disassemble.h @@ -0,0 +1,80 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_PPC_DISASSEMBLE_H__ +#define __ASM_PPC_DISASSEMBLE_H__ + +#include <linux/types.h> + +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +#endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h new file mode 100644 index 000000000000..f49031b632ca --- /dev/null +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -0,0 +1,61 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __ASM_44X_H__ +#define __ASM_44X_H__ + +#include <linux/kvm_host.h> + +#define PPC44x_TLB_SIZE 64 + +/* If the guest is expecting it, this can be as large as we like; we'd just + * need to find some way of advertising it. */ +#define KVM44x_GUEST_TLB_SIZE 64 + +struct kvmppc_44x_shadow_ref { + struct page *page; + u16 gtlb_index; + u8 writeable; + u8 tid; +}; + +struct kvmppc_vcpu_44x { + /* Unmodified copy of the guest's TLB. */ + struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; + + /* References to guest pages in the hardware TLB. */ + struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; + + /* State of the shadow TLB at guest context switch time. */ + struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + + struct kvm_vcpu vcpu; +}; + +static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu) +{ + return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu); +} + +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); + +#endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 34b52b7180cd..c1e436fe7738 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -64,27 +64,58 @@ struct kvm_vcpu_stat { u32 halt_wakeup; }; -struct tlbe { +struct kvmppc_44x_tlbe { u32 tid; /* Only the low 8 bits are used. */ u32 word0; u32 word1; u32 word2; }; -struct kvm_arch { +enum kvm_exit_types { + MMIO_EXITS, + DCR_EXITS, + SIGNAL_EXITS, + ITLB_REAL_MISS_EXITS, + ITLB_VIRT_MISS_EXITS, + DTLB_REAL_MISS_EXITS, + DTLB_VIRT_MISS_EXITS, + SYSCALL_EXITS, + ISI_EXITS, + DSI_EXITS, + EMULATED_INST_EXITS, + EMULATED_MTMSRWE_EXITS, + EMULATED_WRTEE_EXITS, + EMULATED_MTSPR_EXITS, + EMULATED_MFSPR_EXITS, + EMULATED_MTMSR_EXITS, + EMULATED_MFMSR_EXITS, + EMULATED_TLBSX_EXITS, + EMULATED_TLBWE_EXITS, + EMULATED_RFI_EXITS, + DEC_EXITS, + EXT_INTR_EXITS, + HALT_WAKEUP, + USR_PR_INST, + FP_UNAVAIL, + DEBUG_EXITS, + TIMEINGUEST, + __NUMBER_OF_KVM_EXIT_TYPES }; -struct kvm_vcpu_arch { - /* Unmodified copy of the guest's TLB. */ - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - /* TLB that's actually used when the guest is running. */ - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; - /* Pages which are referenced in the shadow TLB. */ - struct page *shadow_pages[PPC44x_TLB_SIZE]; +/* allow access to big endian 32bit upper/lower parts and 64bit var */ +struct kvmppc_exit_timing { + union { + u64 tv64; + struct { + u32 tbu, tbl; + } tv32; + }; +}; - /* Track which TLB entries we've modified in the current exit. */ - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; +struct kvm_arch { +}; +struct kvm_vcpu_arch { u32 host_stack; u32 host_pid; u32 host_dbcr0; @@ -94,32 +125,32 @@ struct kvm_vcpu_arch { u32 host_msr; u64 fpr[32]; - u32 gpr[32]; + ulong gpr[32]; - u32 pc; + ulong pc; u32 cr; - u32 ctr; - u32 lr; - u32 xer; + ulong ctr; + ulong lr; + ulong xer; - u32 msr; + ulong msr; u32 mmucr; - u32 sprg0; - u32 sprg1; - u32 sprg2; - u32 sprg3; - u32 sprg4; - u32 sprg5; - u32 sprg6; - u32 sprg7; - u32 srr0; - u32 srr1; - u32 csrr0; - u32 csrr1; - u32 dsrr0; - u32 dsrr1; - u32 dear; - u32 esr; + ulong sprg0; + ulong sprg1; + ulong sprg2; + ulong sprg3; + ulong sprg4; + ulong sprg5; + ulong sprg6; + ulong sprg7; + ulong srr0; + ulong srr1; + ulong csrr0; + ulong csrr1; + ulong dsrr0; + ulong dsrr1; + ulong dear; + ulong esr; u32 dec; u32 decar; u32 tbl; @@ -127,7 +158,7 @@ struct kvm_vcpu_arch { u32 tcr; u32 tsr; u32 ivor[16]; - u32 ivpr; + ulong ivpr; u32 pir; u32 shadow_pid; @@ -140,9 +171,22 @@ struct kvm_vcpu_arch { u32 dbcr0; u32 dbcr1; +#ifdef CONFIG_KVM_EXIT_TIMING + struct kvmppc_exit_timing timing_exit; + struct kvmppc_exit_timing timing_last_enter; + u32 last_exit_type; + u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_last_exit; + struct dentry *debugfs_exit_timing; +#endif + u32 last_inst; - u32 fault_dear; - u32 fault_esr; + ulong fault_dear; + ulong fault_esr; gpa_t paddr_accessed; u8 io_gpr; /* GPR used as IO source/target */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bb62ad876de3..36d2a50a8487 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -29,11 +29,6 @@ #include <linux/kvm_types.h> #include <linux/kvm_host.h> -struct kvm_tlb { - struct tlbe guest_tlb[PPC44x_TLB_SIZE]; - struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; -}; - enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ @@ -41,9 +36,6 @@ enum emulation_result { EMULATE_FAIL, /* can't emulate this instruction */ }; -extern const unsigned char exception_priority[]; -extern const unsigned char priority_exception[]; - extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); extern char kvmppc_handlers_start[]; extern unsigned long kvmppc_handler_len; @@ -58,51 +50,44 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); +extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, - u64 asid, u32 flags); -extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid); +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + u64 asid, u32 flags, u32 max_bytes, + unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); -/* XXX Book E specific */ -extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); - -extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); - -static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - set_bit(priority, &vcpu->arch.pending_exceptions); -} - -static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception) -{ - unsigned int priority = exception_priority[exception]; - clear_bit(priority, &vcpu->arch.pending_exceptions); -} - -/* Helper function for "full" MSR writes. No need to call this if only EE is - * changing. */ -static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) -{ - if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) - kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); - - vcpu->arch.msr = new_msr; - - if (vcpu->arch.msr & MSR_WE) - kvm_vcpu_block(vcpu); -} - -static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) -{ - if (vcpu->arch.pid != new_pid) { - vcpu->arch.pid = new_pid; - vcpu->arch.swap_pid = 1; - } -} +/* Core-specific hooks */ + +extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, + unsigned int id); +extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu); +extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_processor_compat(void); +extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr); + +extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu); +extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu); + +extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); +extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq); + +extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int op, int *advance); +extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs); +extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt); + +extern int kvmppc_booke_init(void); +extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 8a97cfb08b7e..27cc6fdcd3b7 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -56,6 +56,7 @@ #ifndef __ASSEMBLY__ extern unsigned int tlb_44x_hwater; +extern unsigned int tlb_44x_index; typedef struct { unsigned int id; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f97999..373fca394a54 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d2146b..9937fe44555f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -23,9 +23,6 @@ #include <linux/mm.h> #include <linux/suspend.h> #include <linux/hrtimer.h> -#ifdef CONFIG_KVM -#include <linux/kvm_host.h> -#endif #ifdef CONFIG_PPC64 #include <linux/time.h> #include <linux/hardirq.h> @@ -51,6 +48,9 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iseries/alpaca.h> #endif +#ifdef CONFIG_KVM +#include <asm/kvm_44x.h> +#endif #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) #include "head_booke.h" @@ -357,12 +357,10 @@ int main(void) DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM - DEFINE(TLBE_BYTES, sizeof(struct tlbe)); + DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); - DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); - DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); @@ -385,5 +383,16 @@ int main(void) DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_KVM_EXIT_TIMING + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbl)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c index 4c85b8d56478..688b329800bd 100644 --- a/arch/powerpc/kernel/init_task.c +++ b/arch/powerpc/kernel/init_task.c @@ -7,7 +7,6 @@ #include <linux/mqueue.h> #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac222d0ab12e..23b8b5e36f98 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -237,7 +237,7 @@ void fixup_irqs(cpumask_t map) mask = map; } if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + irq_desc[irq].chip->set_affinity(irq, &mask); else if (irq_desc[irq].action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 51b201ddf9a1..fb7049c054c0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -33,6 +33,7 @@ #include <linux/mqueue.h> #include <linux/hardirq.h> #include <linux/utsname.h> +#include <linux/kernel_stat.h> #include <asm/pgtable.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ac3f721d235..65484b2200b3 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -59,13 +59,9 @@ struct thread_info *secondary_ti; -cpumask_t cpu_possible_map = CPU_MASK_NONE; -cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e1f3a5140429..c9564031a2a9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk) delta += sys_time; get_paca()->system_time = 0; } - account_system_time(tsk, 0, delta); - account_system_time_scaled(tsk, deltascaled); + if (in_irq() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta, deltascaled); + else + account_idle_time(delta); per_cpu(cputime_last_delta, smp_processor_id()) = delta; per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); @@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick) utime = get_paca()->user_time; get_paca()->user_time = 0; - account_user_time(tsk, utime); - utimescaled = cputime_to_scaled(utime); - account_user_time_scaled(tsk, utimescaled); + account_user_time(tsk, utime, utimescaled); } /* @@ -338,8 +338,12 @@ void calculate_steal_time(void) tb = mftb(); purr = mfspr(SPRN_PURR); stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) - account_steal_time(current, stolen); + if (stolen > 0) { + if (idle_task(smp_processor_id()) != current) + account_steal_time(stolen); + else + account_idle_time(stolen); + } pme->tb = tb; pme->purr = purr; } @@ -844,7 +848,7 @@ static void register_decrementer_clockevent(int cpu) struct clock_event_device *dec = &per_cpu(decrementers, cpu).event; *dec = decrementer_clockevent; - dec->cpumask = cpumask_of_cpu(cpu); + dec->cpumask = cpumask_of(cpu); printk(KERN_DEBUG "clockevent: %s mult[%lx] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c new file mode 100644 index 000000000000..a66bec57265a --- /dev/null +++ b/arch/powerpc/kvm/44x.c @@ -0,0 +1,228 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/err.h> + +#include <asm/reg.h> +#include <asm/cputable.h> +#include <asm/tlbflush.h> +#include <asm/kvm_44x.h> +#include <asm/kvm_ppc.h> + +#include "44x_tlb.h" + +/* Note: clearing MSR[DE] just means that the debug interrupt will not be + * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. + * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt + * will be delivered as an "imprecise debug event" (which is indicated by + * DBSR[IDE]. + */ +static void kvm44x_disable_debug_interrupts(void) +{ + mtmsr(mfmsr() & ~MSR_DE); +} + +void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu) +{ + kvm44x_disable_debug_interrupts(); + + mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); + mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); + mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); + mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); + mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); + mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); + mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); + mtmsr(vcpu->arch.host_msr); +} + +void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_debug *dbg = &vcpu->guest_debug; + u32 dbcr0 = 0; + + vcpu->arch.host_msr = mfmsr(); + kvm44x_disable_debug_interrupts(); + + /* Save host debug register state. */ + vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); + vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); + vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); + vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); + vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); + vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); + vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); + + /* set registers up for guest */ + + if (dbg->bp[0]) { + mtspr(SPRN_IAC1, dbg->bp[0]); + dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; + } + if (dbg->bp[1]) { + mtspr(SPRN_IAC2, dbg->bp[1]); + dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; + } + if (dbg->bp[2]) { + mtspr(SPRN_IAC3, dbg->bp[2]); + dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; + } + if (dbg->bp[3]) { + mtspr(SPRN_IAC4, dbg->bp[3]); + dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; + } + + mtspr(SPRN_DBCR0, dbcr0); + mtspr(SPRN_DBCR1, 0); + mtspr(SPRN_DBCR2, 0); +} + +void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + kvmppc_44x_tlb_load(vcpu); +} + +void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvmppc_44x_tlb_put(vcpu); +} + +int kvmppc_core_check_processor_compat(void) +{ + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + return r; +} + +int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; + int i; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) + vcpu_44x->shadow_refs[i].gtlb_index = -1; + + return 0; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + gtlbe = &vcpu_44x->guest_tlb[index]; + + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} + +struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvmppc_vcpu_44x *vcpu_44x; + struct kvm_vcpu *vcpu; + int err; + + vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu_44x) { + err = -ENOMEM; + goto out; + } + + vcpu = &vcpu_44x->vcpu; + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +out: + return ERR_PTR(err); +} + +void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu_44x); +} + +static int kvmppc_44x_init(void) +{ + int r; + + r = kvmppc_booke_init(); + if (r) + return r; + + return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE); +} + +static void kvmppc_44x_exit(void) +{ + kvmppc_booke_exit(); +} + +module_init(kvmppc_44x_init); +module_exit(kvmppc_44x_exit); diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c new file mode 100644 index 000000000000..82489a743a6f --- /dev/null +++ b/arch/powerpc/kvm/44x_emulate.c @@ -0,0 +1,371 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#include <asm/kvm_ppc.h> +#include <asm/dcr.h> +#include <asm/dcr-regs.h> +#include <asm/disassemble.h> +#include <asm/kvm_44x.h> +#include "timing.h" + +#include "booke.h" +#include "44x_tlb.h" + +#define OP_RFI 19 + +#define XOP_RFI 50 +#define XOP_MFMSR 83 +#define XOP_WRTEE 131 +#define XOP_MTMSR 146 +#define XOP_WRTEEI 163 +#define XOP_MFDCR 323 +#define XOP_MTDCR 451 +#define XOP_TLBSX 914 +#define XOP_ICCCI 966 +#define XOP_TLBWE 978 + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + int dcrn; + int ra; + int rb; + int rc; + int rs; + int rt; + int ws; + + switch (get_op(inst)) { + case OP_RFI: + switch (get_xop(inst)) { + case XOP_RFI: + kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); + *advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case XOP_MFMSR: + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); + break; + + case XOP_MTMSR: + rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case XOP_WRTEE: + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_WRTEEI: + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); + break; + + case XOP_MFDCR: + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_MTDCR: + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + emulated = EMULATE_DO_DCR; + } + + break; + + case XOP_TLBWE: + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws); + break; + + case XOP_TLBSX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc); + break; + + case XOP_ICCCI: + break; + + default: + emulated = EMULATE_FAIL; + } + + break; + + default: + emulated = EMULATE_FAIL; + } + + return emulated; +} + +int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + switch (sprn) { + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR0: + vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR1: + vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR2: + vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR3: + vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR4: + vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR5: + vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR6: + vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR7: + vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR8: + vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR9: + vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR10: + vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR11: + vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR12: + vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR13: + vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR14: + vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; + break; + case SPRN_IVOR15: + vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + return EMULATE_DONE; +} + +int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + switch (sprn) { + /* 440 */ + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; + + /* Book E */ + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; + break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; + break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; + break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; + break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; + break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; + break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; + break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; + break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; + break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; + break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; + break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; + break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; + break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; + break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; + break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; + break; + + default: + return EMULATE_FAIL; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + return EMULATE_DONE; +} + diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ad72c6f9811f..9a34b8edb9e2 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -22,20 +22,103 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/highmem.h> + +#include <asm/tlbflush.h> #include <asm/mmu-44x.h> #include <asm/kvm_ppc.h> +#include <asm/kvm_44x.h> +#include "timing.h" #include "44x_tlb.h" +#ifndef PPC44x_TLBE_SIZE +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#endif + +#define PAGE_SIZE_4K (1<<12) +#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) + +#define PPC44x_TLB_UATTR_MASK \ + (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) #define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) #define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) -static unsigned int kvmppc_tlb_44x_pos; +#ifdef DEBUG +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct kvmppc_44x_tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + tlbe = &vcpu_44x->guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} +#endif + +static inline void kvmppc_44x_tlbie(unsigned int index) +{ + /* 0 <= index < 64, so the V bit is clear and we can use the index as + * word0. */ + asm volatile( + "tlbwe %[index], %[index], 0\n" + : + : [index] "r"(index) + ); +} + +static inline void kvmppc_44x_tlbre(unsigned int index, + struct kvmppc_44x_tlbe *tlbe) +{ + asm volatile( + "tlbre %[word0], %[index], 0\n" + "mfspr %[tid], %[sprn_mmucr]\n" + "andi. %[tid], %[tid], 0xff\n" + "tlbre %[word1], %[index], 1\n" + "tlbre %[word2], %[index], 2\n" + : [word0] "=r"(tlbe->word0), + [word1] "=r"(tlbe->word1), + [word2] "=r"(tlbe->word2), + [tid] "=r"(tlbe->tid) + : [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + : "cc" + ); +} + +static inline void kvmppc_44x_tlbwe(unsigned int index, + struct kvmppc_44x_tlbe *stlbe) +{ + unsigned long tmp; + + asm volatile( + "mfspr %[tmp], %[sprn_mmucr]\n" + "rlwimi %[tmp], %[tid], 0, 0xff\n" + "mtspr %[sprn_mmucr], %[tmp]\n" + "tlbwe %[word0], %[index], 0\n" + "tlbwe %[word1], %[index], 1\n" + "tlbwe %[word2], %[index], 2\n" + : [tmp] "=&r"(tmp) + : [word0] "r"(stlbe->word0), + [word1] "r"(stlbe->word1), + [word2] "r"(stlbe->word2), + [tid] "r"(stlbe->tid), + [index] "r"(index), + [sprn_mmucr] "i"(SPRN_MMUCR) + ); +} static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) { - /* Mask off reserved bits. */ - attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; + /* We only care about the guest's permission and user bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK; if (!usermode) { /* Guest is in supervisor mode, so we need to translate guest @@ -47,18 +130,60 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) /* Make sure host can always access this memory. */ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + /* WIMGE = 0b00100 */ + attrib |= PPC44x_TLB_M; + return attrib; } +/* Load shadow TLB back into hardware. */ +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbwe(i, stlbe); + } +} + +static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int i) +{ + vcpu_44x->shadow_tlb_mod[i] = 1; +} + +/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i <= tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i]; + + if (vcpu_44x->shadow_tlb_mod[i]) + kvmppc_44x_tlbre(i, stlbe); + + if (get_tlb_v(stlbe) && get_tlb_ts(stlbe)) + kvmppc_44x_tlbie(i); + } +} + + /* Search the guest TLB for a matching entry. */ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; /* XXX Replace loop with fancy data structures. */ - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) { + struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i]; unsigned int tid; if (eaddr < get_tlb_eaddr(tlbe)) @@ -83,78 +208,89 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, return -1; } -struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_IS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr) { unsigned int as = !!(vcpu->arch.msr & MSR_DS); - unsigned int index; - index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); - if (index == -1) - return NULL; - return &vcpu->arch.guest_tlb[index]; + return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); } -static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) +static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int stlb_index) { - return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); -} + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index]; -static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, - unsigned int index) -{ - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; - struct page *page = vcpu->arch.shadow_pages[index]; + if (!ref->page) + return; - if (get_tlb_v(stlbe)) { - if (kvmppc_44x_tlbe_is_writable(stlbe)) - kvm_release_page_dirty(page); - else - kvm_release_page_clean(page); - } + /* Discard from the TLB. */ + /* Note: we could actually invalidate a host mapping, if the host overwrote + * this TLB entry since we inserted a guest mapping. */ + kvmppc_44x_tlbie(stlb_index); + + /* Now release the page. */ + if (ref->writeable) + kvm_release_page_dirty(ref->page); + else + kvm_release_page_clean(ref->page); + + ref->page = NULL; + + /* XXX set tlb_44x_index to stlb_index? */ + + KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler); } void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu) { + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_44x_shadow_release(vcpu, i); -} - -void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) -{ - vcpu->arch.shadow_tlb_mod[i] = 1; + kvmppc_44x_shadow_release(vcpu_44x, i); } -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, - u32 flags) +/** + * kvmppc_mmu_map -- create a host mapping for guest memory + * + * If the guest wanted a larger page than the host supports, only the first + * host page is mapped here and the rest are demand faulted. + * + * If the guest wanted a smaller page than the host page size, we map only the + * guest-size page (i.e. not a full host page mapping). + * + * Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. + */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, + u32 flags, u32 max_bytes, unsigned int gtlb_index) { + struct kvmppc_44x_tlbe stlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_shadow_ref *ref; struct page *new_page; - struct tlbe *stlbe; hpa_t hpaddr; + gfn_t gfn; unsigned int victim; - /* Future optimization: don't overwrite the TLB entry containing the - * current PC (or stack?). */ - victim = kvmppc_tlb_44x_pos++; - if (kvmppc_tlb_44x_pos > tlb_44x_hwater) - kvmppc_tlb_44x_pos = 0; - stlbe = &vcpu->arch.shadow_tlb[victim]; + /* Select TLB entry to clobber. Indirectly guard against races with the TLB + * miss handler by disabling interrupts. */ + local_irq_disable(); + victim = ++tlb_44x_index; + if (victim > tlb_44x_hwater) + victim = 0; + tlb_44x_index = victim; + local_irq_enable(); /* Get reference to new page. */ + gfn = gpaddr >> PAGE_SHIFT; new_page = gfn_to_page(vcpu->kvm, gfn); if (is_error_page(new_page)) { printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); @@ -163,10 +299,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, } hpaddr = page_to_phys(new_page); - /* Drop reference to old page. */ - kvmppc_44x_shadow_release(vcpu, victim); - - vcpu->arch.shadow_pages[victim] = new_page; + /* Invalidate any previous shadow mappings. */ + kvmppc_44x_shadow_release(vcpu_44x, victim); /* XXX Make sure (va, size) doesn't overlap any other * entries. 440x6 user manual says the result would be @@ -174,78 +308,193 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, /* XXX what about AS? */ - stlbe->tid = !(asid & 0xff); - /* Force TS=1 for all guest mappings. */ - /* For now we hardcode 4KB mappings, but it will be important to - * use host large pages in the future. */ - stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS - | PPC44x_TLB_4K; - stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); - stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, - vcpu->arch.msr & MSR_PR); - kvmppc_tlbe_set_modified(vcpu, victim); + stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; + + if (max_bytes >= PAGE_SIZE) { + /* Guest mapping is larger than or equal to host page size. We can use + * a "native" host mapping. */ + stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE; + } else { + /* Guest mapping is smaller than host page size. We must restrict the + * size of the mapping to be at most the smaller of the two, but for + * simplicity we fall back to a 4K mapping (this is probably what the + * guest is using anyways). */ + stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K; + + /* 'hpaddr' is a host page, which is larger than the mapping we're + * inserting here. To compensate, we must add the in-page offset to the + * sub-page. */ + hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K); + } - KVMTRACE_5D(STLB_WRITE, vcpu, victim, - stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, - handler); + stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.msr & MSR_PR); + stlbe.tid = !(asid & 0xff); + + /* Keep track of the reference so we can properly release it later. */ + ref = &vcpu_44x->shadow_refs[victim]; + ref->page = new_page; + ref->gtlb_index = gtlb_index; + ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW); + ref->tid = stlbe.tid; + + /* Insert shadow mapping into hardware TLB. */ + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); + kvmppc_44x_tlbwe(victim, &stlbe); + KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, + stlbe.word2, handler); } -void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, - gva_t eend, u32 asid) +/* For a particular guest TLB entry, invalidate the corresponding host TLB + * mappings and release the host pages. */ +static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu, + unsigned int gtlb_index) { - unsigned int pid = !(asid & 0xff); + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); int i; - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - unsigned int tid; + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + if (ref->gtlb_index == gtlb_index) + kvmppc_44x_shadow_release(vcpu_44x, i); + } +} - if (!get_tlb_v(stlbe)) - continue; +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + vcpu->arch.shadow_pid = !usermode; +} - if (eend < get_tlb_eaddr(stlbe)) - continue; +void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; - if (eaddr > get_tlb_end(stlbe)) - continue; + if (unlikely(vcpu->arch.pid == new_pid)) + return; - tid = get_tlb_tid(stlbe); - if (tid && (tid != pid)) - continue; + vcpu->arch.pid = new_pid; - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); + /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it + * can't access guest kernel mappings (TID=1). When we switch to a new + * guest PID, which will also use host PID=0, we must discard the old guest + * userspace mappings. */ + for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) { + struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i]; + + if (ref->tid == 0) + kvmppc_44x_shadow_release(vcpu_44x, i); } } -/* Invalidate all mappings on the privilege switch after PID has been changed. - * The guest always runs with PID=1, so we must clear the entire TLB when - * switching address spaces. */ -void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct kvmppc_44x_tlbe *tlbe) { - int i; + gpa_t gpa; - if (vcpu->arch.swap_pid) { - /* XXX Replace loop with fancy data structures. */ - for (i = 0; i <= tlb_44x_hwater; i++) { - struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; - - /* Future optimization: clear only userspace mappings. */ - kvmppc_44x_shadow_release(vcpu, i); - stlbe->word0 = 0; - kvmppc_tlbe_set_modified(vcpu, i); - KVMTRACE_5D(STLB_INVAL, vcpu, i, - stlbe->tid, stlbe->word0, stlbe->word1, - stlbe->word2, handler); - } - vcpu->arch.swap_pid = 0; + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *tlbe; + unsigned int gtlb_index; + + gtlb_index = vcpu->arch.gpr[ra]; + if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { + printk("%s: index %d\n", __func__, gtlb_index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; } - vcpu->arch.shadow_pid = !usermode; + tlbe = &vcpu_44x->guest_tlb[gtlb_index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */ + if (tlbe->word0 & PPC44x_TLB_VALID) + kvmppc_44x_invalidate(vcpu, gtlb_index); + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = get_mmucr_stid(vcpu); + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + u64 asid; + gva_t eaddr; + gpa_t gpaddr; + u32 flags; + u32 bytes; + + eaddr = get_tlb_eaddr(tlbe); + gpaddr = get_tlb_raddr(tlbe); + + /* Use the advertised page size to mask effective and real addrs. */ + bytes = get_tlb_bytes(tlbe); + eaddr &= ~(bytes - 1); + gpaddr &= ~(bytes - 1); + + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index); + } + + KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0, + tlbe->word1, tlbe->word2, handler); + + kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); + return EMULATE_DONE; +} + +int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) +{ + u32 ea; + int gtlb_index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (gtlb_index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = gtlb_index; + + kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); + return EMULATE_DONE; } diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h index 2ccd46b6f6b7..772191f29e62 100644 --- a/arch/powerpc/kvm/44x_tlb.h +++ b/arch/powerpc/kvm/44x_tlb.h @@ -25,48 +25,52 @@ extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, unsigned int as); -extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); -extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); +extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr); + +extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, + u8 rc); +extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws); /* TLB helper functions */ -static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 4) & 0xf; } -static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->word0 & 0xfffffc00; } -static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) +static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe) { unsigned int pgsize = get_tlb_size(tlbe); return 1 << 10 << (pgsize << 1); } -static inline gva_t get_tlb_end(const struct tlbe *tlbe) +static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe) { return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; } -static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe) { u64 word1 = tlbe->word1; return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); } -static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe) { return tlbe->tid & 0xff; } -static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 8) & 0x1; } -static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe) { return (tlbe->word0 >> 9) & 0x1; } @@ -81,7 +85,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) return (vcpu->arch.mmucr >> 16) & 0x1; } -static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) +static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr) { unsigned int pgmask = get_tlb_bytes(tlbe) - 1; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 53aaa66b25e5..6dbdc4817d80 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -15,27 +15,33 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - bool "Kernel-based Virtual Machine (KVM) support" - depends on 44x && EXPERIMENTAL + bool select PREEMPT_NOTIFIERS select ANON_INODES - # We can only run on Book E hosts so far - select KVM_BOOKE_HOST + +config KVM_440 + bool "KVM support for PowerPC 440 processors" + depends on EXPERIMENTAL && 44x + select KVM ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support running unmodified 440 guest kernels in virtual machines on + 440 host processors. This module provides access to the hardware capabilities through a character device node named /dev/kvm. If unsure, say N. -config KVM_BOOKE_HOST - bool "KVM host support for Book E PowerPC processors" - depends on KVM && 44x +config KVM_EXIT_TIMING + bool "Detailed exit timing" + depends on KVM ---help--- - Provides host support for KVM on Book E PowerPC processors. Currently - this works on 440 processors only. + Calculate elapsed time for every exit/enter cycle. A per-vcpu + report is available in debugfs kvm/vm#_vcpu#_timing. + The overhead is relatively small, however it is not recommended for + production environments. + + If unsure, say N. config KVM_TRACE bool "KVM trace support" diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 2a5d4397ac4b..df7ba59e6d53 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -8,10 +8,16 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) -kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o +kvm-objs := $(common-objs-y) powerpc.o emulate.o +obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM) += kvm.o AFLAGS_booke_interrupts.o := -I$(obj) -kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o -obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o +kvm-440-objs := \ + booke.o \ + booke_interrupts.o \ + 44x.o \ + 44x_tlb.o \ + 44x_emulate.o +obj-$(CONFIG_KVM_440) += kvm-440.o diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke.c index 7b2591e26bae..35485dd6927e 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke.c @@ -24,21 +24,26 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> + #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> +#include "timing.h" +#include <asm/cacheflush.h> +#include <asm/kvm_44x.h> +#include "booke.h" #include "44x_tlb.h" +unsigned long kvmppc_booke_handlers; + #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, { "mmio", VCPU_STAT(mmio_exits) }, { "dcr", VCPU_STAT(dcr_exits) }, { "sig", VCPU_STAT(signal_exits) }, - { "light", VCPU_STAT(light_exits) }, { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, @@ -53,103 +58,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static const u32 interrupt_msr_mask[16] = { - [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, - [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, - [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, - [BOOKE_INTERRUPT_DEBUG] = MSR_ME, -}; - -const unsigned char exception_priority[] = { - [BOOKE_INTERRUPT_DATA_STORAGE] = 0, - [BOOKE_INTERRUPT_INST_STORAGE] = 1, - [BOOKE_INTERRUPT_ALIGNMENT] = 2, - [BOOKE_INTERRUPT_PROGRAM] = 3, - [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, - [BOOKE_INTERRUPT_SYSCALL] = 5, - [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, - [BOOKE_INTERRUPT_DTLB_MISS] = 7, - [BOOKE_INTERRUPT_ITLB_MISS] = 8, - [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, - [BOOKE_INTERRUPT_DEBUG] = 10, - [BOOKE_INTERRUPT_CRITICAL] = 11, - [BOOKE_INTERRUPT_WATCHDOG] = 12, - [BOOKE_INTERRUPT_EXTERNAL] = 13, - [BOOKE_INTERRUPT_FIT] = 14, - [BOOKE_INTERRUPT_DECREMENTER] = 15, -}; - -const unsigned char priority_exception[] = { - BOOKE_INTERRUPT_DATA_STORAGE, - BOOKE_INTERRUPT_INST_STORAGE, - BOOKE_INTERRUPT_ALIGNMENT, - BOOKE_INTERRUPT_PROGRAM, - BOOKE_INTERRUPT_FP_UNAVAIL, - BOOKE_INTERRUPT_SYSCALL, - BOOKE_INTERRUPT_AP_UNAVAIL, - BOOKE_INTERRUPT_DTLB_MISS, - BOOKE_INTERRUPT_ITLB_MISS, - BOOKE_INTERRUPT_MACHINE_CHECK, - BOOKE_INTERRUPT_DEBUG, - BOOKE_INTERRUPT_CRITICAL, - BOOKE_INTERRUPT_WATCHDOG, - BOOKE_INTERRUPT_EXTERNAL, - BOOKE_INTERRUPT_FIT, - BOOKE_INTERRUPT_DECREMENTER, -}; - - -void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) -{ - struct tlbe *tlbe; - int i; - - printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); - printk("| %2s | %3s | %8s | %8s | %8s |\n", - "nr", "tid", "word0", "word1", "word2"); - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.guest_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" G%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } - - for (i = 0; i < PPC44x_TLB_SIZE; i++) { - tlbe = &vcpu->arch.shadow_tlb[i]; - if (tlbe->word0 & PPC44x_TLB_VALID) - printk(" S%2d | %02X | %08X | %08X | %08X |\n", - i, tlbe->tid, tlbe->word0, tlbe->word1, - tlbe->word2); - } -} - /* TODO: use vcpu_printf() */ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) { int i; - printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); - printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); - printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); + printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1); printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); for (i = 0; i < 32; i += 4) { - printk("gpr%02d: %08x %08x %08x %08x\n", i, + printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, vcpu->arch.gpr[i], vcpu->arch.gpr[i+1], vcpu->arch.gpr[i+2], @@ -157,69 +78,96 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) } } -/* Check if we are ready to deliver the interrupt */ -static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, + unsigned int priority) { - int r; + set_bit(priority, &vcpu->arch.pending_exceptions); +} - switch (interrupt) { - case BOOKE_INTERRUPT_CRITICAL: - r = vcpu->arch.msr & MSR_CE; - break; - case BOOKE_INTERRUPT_MACHINE_CHECK: - r = vcpu->arch.msr & MSR_ME; - break; - case BOOKE_INTERRUPT_EXTERNAL: - r = vcpu->arch.msr & MSR_EE; +void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); +} + +void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); +} + +int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) +{ + return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); +} + +void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); +} + +/* Deliver the interrupt of the corresponding priority, if possible. */ +static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, + unsigned int priority) +{ + int allowed = 0; + ulong msr_mask; + + switch (priority) { + case BOOKE_IRQPRIO_PROGRAM: + case BOOKE_IRQPRIO_DTLB_MISS: + case BOOKE_IRQPRIO_ITLB_MISS: + case BOOKE_IRQPRIO_SYSCALL: + case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_INST_STORAGE: + case BOOKE_IRQPRIO_FP_UNAVAIL: + case BOOKE_IRQPRIO_AP_UNAVAIL: + case BOOKE_IRQPRIO_ALIGNMENT: + allowed = 1; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DECREMENTER: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_CRITICAL: + case BOOKE_IRQPRIO_WATCHDOG: + allowed = vcpu->arch.msr & MSR_CE; + msr_mask = MSR_ME; break; - case BOOKE_INTERRUPT_FIT: - r = vcpu->arch.msr & MSR_EE; + case BOOKE_IRQPRIO_MACHINE_CHECK: + allowed = vcpu->arch.msr & MSR_ME; + msr_mask = 0; break; - case BOOKE_INTERRUPT_WATCHDOG: - r = vcpu->arch.msr & MSR_CE; + case BOOKE_IRQPRIO_EXTERNAL: + case BOOKE_IRQPRIO_DECREMENTER: + case BOOKE_IRQPRIO_FIT: + allowed = vcpu->arch.msr & MSR_EE; + msr_mask = MSR_CE|MSR_ME|MSR_DE; break; - case BOOKE_INTERRUPT_DEBUG: - r = vcpu->arch.msr & MSR_DE; + case BOOKE_IRQPRIO_DEBUG: + allowed = vcpu->arch.msr & MSR_DE; + msr_mask = MSR_ME; break; - default: - r = 1; } - return r; -} + if (allowed) { + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); -static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) -{ - switch (interrupt) { - case BOOKE_INTERRUPT_DECREMENTER: - vcpu->arch.tsr |= TSR_DIS; - break; + clear_bit(priority, &vcpu->arch.pending_exceptions); } - vcpu->arch.srr0 = vcpu->arch.pc; - vcpu->arch.srr1 = vcpu->arch.msr; - vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; - kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); + return allowed; } /* Check pending exceptions and deliver one, if possible. */ -void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) +void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) { unsigned long *pending = &vcpu->arch.pending_exceptions; - unsigned int exception; unsigned int priority; - priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); + priority = __ffs(*pending); while (priority <= BOOKE_MAX_INTERRUPT) { - exception = priority_exception[priority]; - if (kvmppc_can_deliver_interrupt(vcpu, exception)) { - kvmppc_clear_exception(vcpu, exception); - kvmppc_deliver_interrupt(vcpu, exception); + if (kvmppc_booke_irqprio_deliver(vcpu, priority)) break; - } priority = find_next_bit(pending, BITS_PER_BYTE * sizeof(*pending), @@ -238,6 +186,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; int r = RESUME_HOST; + /* update before a new last_exit_type is rewritten */ + kvmppc_update_timing_stats(vcpu); + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -251,21 +202,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_EXTERNAL: + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); + if (need_resched()) + cond_resched(); + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DECREMENTER: /* Since we switched IVPR back to the host's value, the host * handled this interrupt the moment we enabled interrupts. * Now we just offer it a chance to reschedule the guest. */ - - /* XXX At this point the TLB still holds our shadow TLB, so if - * we do reschedule the host will fault over it. Perhaps we - * should politely restore the host's entries to minimize - * misses before ceding control. */ + kvmppc_account_exit(vcpu, DEC_EXITS); if (need_resched()) cond_resched(); - if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) - vcpu->stat.dec_exits++; - else - vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -274,17 +223,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Program traps generated by user-level software must be handled * by the guest kernel. */ vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); r = RESUME_GUEST; + kvmppc_account_exit(vcpu, USR_PR_INST); break; } er = kvmppc_emulate_instruction(run, vcpu); switch (er) { case EMULATE_DONE: + /* don't overwrite subtypes, just account kvm_stats */ + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ - vcpu->stat.emulated_inst_exits++; r = RESUME_GUEST_NV; break; case EMULATE_DO_DCR: @@ -293,7 +244,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case EMULATE_FAIL: /* XXX Deliver Program interrupt to guest. */ - printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", + printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); /* For debugging, encode the failing instruction and * report it to userspace. */ @@ -307,48 +258,53 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOKE_INTERRUPT_FP_UNAVAIL: - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); + kvmppc_account_exit(vcpu, FP_UNAVAIL); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_DATA_STORAGE: vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.dsi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); + kvmppc_account_exit(vcpu, DSI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_INST_STORAGE: vcpu->arch.esr = vcpu->arch.fault_esr; - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.isi_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); + kvmppc_account_exit(vcpu, ISI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_SYSCALL: - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.syscall_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL); + kvmppc_account_exit(vcpu, SYSCALL_EXITS); r = RESUME_GUEST; break; + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_DTLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.fault_dear; + int gtlb_index; gfn_t gfn; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; - vcpu->stat.dtlb_real_miss_exits++; + kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; } + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; @@ -359,38 +315,45 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); - vcpu->stat.dtlb_virt_miss_exits++; + kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); + kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ r = kvmppc_emulate_mmio(run, vcpu); + kvmppc_account_exit(vcpu, MMIO_EXITS); } break; } + /* XXX move to a 440-specific file. */ case BOOKE_INTERRUPT_ITLB_MISS: { - struct tlbe *gtlbe; + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + struct kvmppc_44x_tlbe *gtlbe; unsigned long eaddr = vcpu->arch.pc; + gpa_t gpaddr; gfn_t gfn; + int gtlb_index; r = RESUME_GUEST; /* Check the guest TLB. */ - gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); - if (!gtlbe) { + gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr); + if (gtlb_index < 0) { /* The guest didn't have a mapping for it. */ - kvmppc_queue_exception(vcpu, exit_nr); - vcpu->stat.itlb_real_miss_exits++; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); + kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS); break; } - vcpu->stat.itlb_virt_miss_exits++; + kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS); - gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; + gtlbe = &vcpu_44x->guest_tlb[gtlb_index]; + gpaddr = tlb_xlate(gtlbe, eaddr); + gfn = gpaddr >> PAGE_SHIFT; if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { /* The guest TLB had a mapping, but the shadow TLB @@ -399,12 +362,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * b) the guest used a large mapping which we're faking * Either way, we need to satisfy the fault without * invoking the guest. */ - kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, - gtlbe->word2); + kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, + gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index); } else { /* Guest mapped and leaped at non-RAM! */ - kvmppc_queue_exception(vcpu, - BOOKE_INTERRUPT_MACHINE_CHECK); + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); } break; @@ -421,6 +383,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, mtspr(SPRN_DBSR, dbsr); run->exit_reason = KVM_EXIT_DEBUG; + kvmppc_account_exit(vcpu, DEBUG_EXITS); r = RESUME_HOST; break; } @@ -432,10 +395,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_disable(); - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); - /* Do some exit accounting. */ - vcpu->stat.sum_exits++; if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -443,22 +404,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (signal_pending(current)) { run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - - vcpu->stat.signal_exits++; - } else { - vcpu->stat.light_exits++; - } - } else { - switch (run->exit_reason) { - case KVM_EXIT_MMIO: - vcpu->stat.mmio_exits++; - break; - case KVM_EXIT_DCR: - vcpu->stat.dcr_exits++; - break; - case KVM_EXIT_INTR: - vcpu->stat.signal_exits++; - break; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); } } @@ -468,20 +414,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { - struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; - - tlbe->tid = 0; - tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; - tlbe->word1 = 0; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; - - tlbe++; - tlbe->tid = 0; - tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; - tlbe->word1 = 0xef600000; - tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR - | PPC44x_TLB_I | PPC44x_TLB_G; - vcpu->arch.pc = 0; vcpu->arch.msr = 0; vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ @@ -492,12 +424,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) * before it's programmed its own IVPR. */ vcpu->arch.ivpr = 0x55550000; - /* Since the guest can directly access the timebase, it must know the - * real timebase frequency. Accordingly, it must see the state of - * CCR1[TCS]. */ - vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + kvmppc_init_timing_stats(vcpu); - return 0; + return kvmppc_core_vcpu_setup(vcpu); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -536,7 +465,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.ctr = regs->ctr; vcpu->arch.lr = regs->lr; vcpu->arch.xer = regs->xer; - vcpu->arch.msr = regs->msr; + kvmppc_set_msr(vcpu, regs->msr); vcpu->arch.srr0 = regs->srr0; vcpu->arch.srr1 = regs->srr1; vcpu->arch.sprg0 = regs->sprg0; @@ -575,31 +504,62 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } -/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { - struct tlbe *gtlbe; - int index; - gva_t eaddr; - u8 pid; - u8 as; - - eaddr = tr->linear_address; - pid = (tr->linear_address >> 32) & 0xff; - as = (tr->linear_address >> 40) & 0x1; - - index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); - if (index == -1) { - tr->valid = 0; - return 0; - } + return kvmppc_core_vcpu_translate(vcpu, tr); +} - gtlbe = &vcpu->arch.guest_tlb[index]; +int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; - tr->physical_address = tlb_xlate(gtlbe, eaddr); - /* XXX what does "writeable" and "usermode" even mean? */ - tr->valid = 1; + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); return 0; } + +void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h new file mode 100644 index 000000000000..cf7c94ca24bf --- /dev/null +++ b/arch/powerpc/kvm/booke.h @@ -0,0 +1,60 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +#ifndef __KVM_BOOKE_H__ +#define __KVM_BOOKE_H__ + +#include <linux/types.h> +#include <linux/kvm_host.h> +#include "timing.h" + +/* interrupt priortity ordering */ +#define BOOKE_IRQPRIO_DATA_STORAGE 0 +#define BOOKE_IRQPRIO_INST_STORAGE 1 +#define BOOKE_IRQPRIO_ALIGNMENT 2 +#define BOOKE_IRQPRIO_PROGRAM 3 +#define BOOKE_IRQPRIO_FP_UNAVAIL 4 +#define BOOKE_IRQPRIO_SYSCALL 5 +#define BOOKE_IRQPRIO_AP_UNAVAIL 6 +#define BOOKE_IRQPRIO_DTLB_MISS 7 +#define BOOKE_IRQPRIO_ITLB_MISS 8 +#define BOOKE_IRQPRIO_MACHINE_CHECK 9 +#define BOOKE_IRQPRIO_DEBUG 10 +#define BOOKE_IRQPRIO_CRITICAL 11 +#define BOOKE_IRQPRIO_WATCHDOG 12 +#define BOOKE_IRQPRIO_EXTERNAL 13 +#define BOOKE_IRQPRIO_FIT 14 +#define BOOKE_IRQPRIO_DECREMENTER 15 + +/* Helper function for "full" MSR writes. No need to call this if only EE is + * changing. */ +static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) +{ + if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR)) + kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR); + + vcpu->arch.msr = new_msr; + + if (vcpu->arch.msr & MSR_WE) { + kvm_vcpu_block(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); + }; +} + +#endif /* __KVM_BOOKE_H__ */ diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c deleted file mode 100644 index b480341bc31e..000000000000 --- a/arch/powerpc/kvm/booke_host.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright IBM Corp. 2008 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - */ - -#include <linux/errno.h> -#include <linux/kvm_host.h> -#include <linux/module.h> -#include <asm/cacheflush.h> -#include <asm/kvm_ppc.h> - -unsigned long kvmppc_booke_handlers; - -static int kvmppc_booke_init(void) -{ - unsigned long ivor[16]; - unsigned long max_ivor = 0; - int i; - - /* We install our own exception handlers by hijacking IVPR. IVPR must - * be 16-bit aligned, so we need a 64KB allocation. */ - kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, - VCPU_SIZE_ORDER); - if (!kvmppc_booke_handlers) - return -ENOMEM; - - /* XXX make sure our handlers are smaller than Linux's */ - - /* Copy our interrupt handlers to match host IVORs. That way we don't - * have to swap the IVORs on every guest/host transition. */ - ivor[0] = mfspr(SPRN_IVOR0); - ivor[1] = mfspr(SPRN_IVOR1); - ivor[2] = mfspr(SPRN_IVOR2); - ivor[3] = mfspr(SPRN_IVOR3); - ivor[4] = mfspr(SPRN_IVOR4); - ivor[5] = mfspr(SPRN_IVOR5); - ivor[6] = mfspr(SPRN_IVOR6); - ivor[7] = mfspr(SPRN_IVOR7); - ivor[8] = mfspr(SPRN_IVOR8); - ivor[9] = mfspr(SPRN_IVOR9); - ivor[10] = mfspr(SPRN_IVOR10); - ivor[11] = mfspr(SPRN_IVOR11); - ivor[12] = mfspr(SPRN_IVOR12); - ivor[13] = mfspr(SPRN_IVOR13); - ivor[14] = mfspr(SPRN_IVOR14); - ivor[15] = mfspr(SPRN_IVOR15); - - for (i = 0; i < 16; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; - - memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); - } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); - - return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); -} - -static void __exit kvmppc_booke_exit(void) -{ - free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); - kvm_exit(); -} - -module_init(kvmppc_booke_init) -module_exit(kvmppc_booke_exit) diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 95e165baf85f..084ebcd7dd83 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -107,6 +107,18 @@ _GLOBAL(kvmppc_resume_host) li r6, 1 slw r6, r6, r5 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save exit time */ +1: + mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + bne 1b + stw r8, VCPU_TIMING_EXIT_TBL(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) +#endif + /* Save the faulting instruction and all GPRs for emulation. */ andi. r7, r6, NEED_INST_MASK beq ..skip_inst_copy @@ -335,54 +347,6 @@ lightweight_exit: lwz r3, VCPU_SHADOW_PID(r4) mtspr SPRN_PID, r3 - /* Prevent all asynchronous TLB updates. */ - mfmsr r5 - lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h - ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l - andc r6, r5, r6 - mtmsr r6 - - /* Load the guest mappings, leaving the host's "pinned" kernel mappings - * in place. */ - mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ - li r5, PPC44x_TLB_SIZE - lis r5, tlb_44x_hwater@ha - lwz r5, tlb_44x_hwater@l(r5) - mtctr r5 - addi r9, r4, VCPU_SHADOW_TLB - addi r5, r4, VCPU_SHADOW_MOD - li r3, 0 -1: - lbzx r7, r3, r5 - cmpwi r7, 0 - beq 3f - - /* Load guest entry. */ - mulli r11, r3, TLBE_BYTES - add r11, r11, r9 - lwz r7, 0(r11) - mtspr SPRN_MMUCR, r7 - lwz r7, 4(r11) - tlbwe r7, r3, PPC44x_TLB_PAGEID - lwz r7, 8(r11) - tlbwe r7, r3, PPC44x_TLB_XLAT - lwz r7, 12(r11) - tlbwe r7, r3, PPC44x_TLB_ATTRIB -3: - addi r3, r3, 1 /* Increment index. */ - bdnz 1b - - mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ - - /* Clear bitmap of modified TLB entries */ - li r5, PPC44x_TLB_SIZE>>2 - mtctr r5 - addi r5, r4, VCPU_SHADOW_MOD - 4 - li r6, 0 -1: - stwu r6, 4(r5) - bdnz 1b - iccci 0, 0 /* XXX hack */ /* Load some guest volatiles. */ @@ -423,6 +387,18 @@ lightweight_exit: lwz r3, VCPU_SPRG7(r4) mtspr SPRN_SPRG7, r3 +#ifdef CONFIG_KVM_EXIT_TIMING + /* save enter time */ +1: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + bne 1b + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) mtctr r3 diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 0fce4fbdc20d..d1d38daa93fb 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -23,161 +23,14 @@ #include <linux/string.h> #include <linux/kvm_host.h> -#include <asm/dcr.h> -#include <asm/dcr-regs.h> +#include <asm/reg.h> #include <asm/time.h> #include <asm/byteorder.h> #include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include "timing.h" -#include "44x_tlb.h" - -/* Instruction decoding */ -static inline unsigned int get_op(u32 inst) -{ - return inst >> 26; -} - -static inline unsigned int get_xop(u32 inst) -{ - return (inst >> 1) & 0x3ff; -} - -static inline unsigned int get_sprn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_dcrn(u32 inst) -{ - return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); -} - -static inline unsigned int get_rt(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_rs(u32 inst) -{ - return (inst >> 21) & 0x1f; -} - -static inline unsigned int get_ra(u32 inst) -{ - return (inst >> 16) & 0x1f; -} - -static inline unsigned int get_rb(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_rc(u32 inst) -{ - return inst & 0x1; -} - -static inline unsigned int get_ws(u32 inst) -{ - return (inst >> 11) & 0x1f; -} - -static inline unsigned int get_d(u32 inst) -{ - return inst & 0xffff; -} - -static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, - const struct tlbe *tlbe) -{ - gpa_t gpa; - - if (!get_tlb_v(tlbe)) - return 0; - - /* Does it match current guest AS? */ - /* XXX what about IS != DS? */ - if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) - return 0; - - gpa = get_tlb_raddr(tlbe); - if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) - /* Mapping is not for RAM. */ - return 0; - - return 1; -} - -static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) -{ - u64 eaddr; - u64 raddr; - u64 asid; - u32 flags; - struct tlbe *tlbe; - unsigned int ra; - unsigned int rs; - unsigned int ws; - unsigned int index; - - ra = get_ra(inst); - rs = get_rs(inst); - ws = get_ws(inst); - - index = vcpu->arch.gpr[ra]; - if (index > PPC44x_TLB_SIZE) { - printk("%s: index %d\n", __func__, index); - kvmppc_dump_vcpu(vcpu); - return EMULATE_FAIL; - } - - tlbe = &vcpu->arch.guest_tlb[index]; - - /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ - if (tlbe->word0 & PPC44x_TLB_VALID) { - eaddr = get_tlb_eaddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); - } - - switch (ws) { - case PPC44x_TLB_PAGEID: - tlbe->tid = vcpu->arch.mmucr & 0xff; - tlbe->word0 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_XLAT: - tlbe->word1 = vcpu->arch.gpr[rs]; - break; - - case PPC44x_TLB_ATTRIB: - tlbe->word2 = vcpu->arch.gpr[rs]; - break; - - default: - return EMULATE_FAIL; - } - - if (tlbe_is_host_safe(vcpu, tlbe)) { - eaddr = get_tlb_eaddr(tlbe); - raddr = get_tlb_raddr(tlbe); - asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - flags = tlbe->word2 & 0xffff; - - /* Create a 4KB mapping on the host. If the guest wanted a - * large page, only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); - } - - KVMTRACE_5D(GTLB_WRITE, vcpu, index, - tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, - handler); - - return EMULATE_DONE; -} - -static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) { if (vcpu->arch.tcr & TCR_DIE) { /* The decrementer ticks at the same rate as the timebase, so @@ -193,12 +46,6 @@ static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) } } -static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pc = vcpu->arch.srr0; - kvmppc_set_msr(vcpu, vcpu->arch.srr1); -} - /* XXX to do: * lhax * lhaux @@ -213,40 +60,30 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) * * XXX is_bigendian should depend on MMU mapping or MSR[LE] */ +/* XXX Should probably auto-generate instruction decoding for a particular core + * from opcode tables in the future. */ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) { u32 inst = vcpu->arch.last_inst; u32 ea; int ra; int rb; - int rc; int rs; int rt; int sprn; - int dcrn; enum emulation_result emulated = EMULATE_DONE; int advance = 1; + /* this default type might be overwritten by subcategories */ + kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); + switch (get_op(inst)) { - case 3: /* trap */ - printk("trap!\n"); - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + case 3: /* trap */ + vcpu->arch.esr |= ESR_PTR; + kvmppc_core_queue_program(vcpu); advance = 0; break; - case 19: - switch (get_xop(inst)) { - case 50: /* rfi */ - kvmppc_emul_rfi(vcpu); - advance = 0; - break; - - default: - emulated = EMULATE_FAIL; - break; - } - break; - case 31: switch (get_xop(inst)) { @@ -255,27 +92,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - case 83: /* mfmsr */ - rt = get_rt(inst); - vcpu->arch.gpr[rt] = vcpu->arch.msr; - break; - case 87: /* lbzx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case 131: /* wrtee */ - rs = get_rs(inst); - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (vcpu->arch.gpr[rs] & MSR_EE); - break; - - case 146: /* mtmsr */ - rs = get_rs(inst); - kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); - break; - case 151: /* stwx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -283,11 +104,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 1); break; - case 163: /* wrteei */ - vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) - | (inst & MSR_EE); - break; - case 215: /* stbx */ rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -328,42 +144,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 323: /* mfdcr */ - dcrn = get_dcrn(inst); - rt = get_rt(inst); - - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 339: /* mfspr */ sprn = get_sprn(inst); rt = get_rt(inst); @@ -373,26 +153,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; case SPRN_SRR1: vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; - case SPRN_MMUCR: - vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; - case SPRN_PID: - vcpu->arch.gpr[rt] = vcpu->arch.pid; break; - case SPRN_IVPR: - vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; - case SPRN_CCR0: - vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; - case SPRN_CCR1: - vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; case SPRN_PVR: vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; - case SPRN_DEAR: - vcpu->arch.gpr[rt] = vcpu->arch.dear; break; - case SPRN_ESR: - vcpu->arch.gpr[rt] = vcpu->arch.esr; break; - case SPRN_DBCR0: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; - case SPRN_DBCR1: - vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. @@ -413,42 +175,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) /* Note: SPRG4-7 are user-readable, so we don't get * a trap. */ - case SPRN_IVOR0: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; - case SPRN_IVOR1: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; - case SPRN_IVOR2: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; - case SPRN_IVOR3: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; - case SPRN_IVOR4: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; - case SPRN_IVOR5: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; - case SPRN_IVOR6: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; - case SPRN_IVOR7: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; - case SPRN_IVOR8: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; - case SPRN_IVOR9: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; - case SPRN_IVOR10: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; - case SPRN_IVOR11: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; - case SPRN_IVOR12: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; - case SPRN_IVOR13: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; - case SPRN_IVOR14: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; - case SPRN_IVOR15: - vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; - default: - printk("mfspr: unknown spr %x\n", sprn); - vcpu->arch.gpr[rt] = 0; + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); + if (emulated == EMULATE_FAIL) { + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; + } break; } break; @@ -478,25 +210,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.gpr[ra] = ea; break; - case 451: /* mtdcr */ - dcrn = get_dcrn(inst); - rs = get_rs(inst); - - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = vcpu->arch.gpr[rs]; - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - emulated = EMULATE_DO_DCR; - } - - break; - case 467: /* mtspr */ sprn = get_sprn(inst); rs = get_rs(inst); @@ -505,22 +218,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; case SPRN_SRR1: vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; - case SPRN_MMUCR: - vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; - case SPRN_PID: - kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; - case SPRN_CCR0: - vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; - case SPRN_CCR1: - vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; - case SPRN_DEAR: - vcpu->arch.dear = vcpu->arch.gpr[rs]; break; - case SPRN_ESR: - vcpu->arch.esr = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR0: - vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; - case SPRN_DBCR1: - vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; /* XXX We need to context-switch the timebase for * watchdog and FIT. */ @@ -532,14 +229,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_emulate_dec(vcpu); break; - case SPRN_TSR: - vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; - - case SPRN_TCR: - vcpu->arch.tcr = vcpu->arch.gpr[rs]; - kvmppc_emulate_dec(vcpu); - break; - case SPRN_SPRG0: vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; case SPRN_SPRG1: @@ -549,56 +238,10 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case SPRN_SPRG3: vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; - /* Note: SPRG4-7 are user-readable. These values are - * loaded into the real SPRGs when resuming the - * guest. */ - case SPRN_SPRG4: - vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG5: - vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG6: - vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; - case SPRN_SPRG7: - vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; - - case SPRN_IVPR: - vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR0: - vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR1: - vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR2: - vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR3: - vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR4: - vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR5: - vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR6: - vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR7: - vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR8: - vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR9: - vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR10: - vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR11: - vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR12: - vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR13: - vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR14: - vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; - case SPRN_IVOR15: - vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; - default: - printk("mtspr: unknown spr %x\n", sprn); - emulated = EMULATE_FAIL; + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); + if (emulated == EMULATE_FAIL) + printk("mtspr: unknown spr %x\n", sprn); break; } break; @@ -629,36 +272,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 0); break; - case 978: /* tlbwe */ - emulated = kvmppc_emul_tlbwe(vcpu, inst); - break; - - case 914: { /* tlbsx */ - int index; - unsigned int as = get_mmucr_sts(vcpu); - unsigned int pid = get_mmucr_stid(vcpu); - - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - rc = get_rc(inst); - - ea = vcpu->arch.gpr[rb]; - if (ra) - ea += vcpu->arch.gpr[ra]; - - index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); - if (rc) { - if (index < 0) - vcpu->arch.cr &= ~0x20000000; - else - vcpu->arch.cr |= 0x20000000; - } - vcpu->arch.gpr[rt] = index; - - } - break; - case 790: /* lhbrx */ rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); @@ -674,14 +287,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 2, 0); break; - case 966: /* iccci */ - break; - default: - printk("unknown: op %d xop %d\n", get_op(inst), - get_xop(inst)); + /* Attempt core-specific emulation below. */ emulated = EMULATE_FAIL; - break; } break; @@ -764,12 +372,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: - printk("unknown op %d\n", get_op(inst)); emulated = EMULATE_FAIL; - break; } - KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); + if (emulated == EMULATE_FAIL) { + emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); + if (emulated == EMULATE_FAIL) { + advance = 0; + printk(KERN_ERR "Couldn't emulate instruction 0x%08x " + "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); + } + } + + KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit); if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8bef0efcdfe1..2822c8ccfaaf 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -28,9 +28,9 @@ #include <asm/uaccess.h> #include <asm/kvm_ppc.h> #include <asm/tlbflush.h> +#include "timing.h" #include "../mm/mmu_decl.h" - gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { return gfn; @@ -99,14 +99,7 @@ void kvm_arch_hardware_unsetup(void) void kvm_arch_check_processor_compat(void *rtn) { - int r; - - if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) - r = 0; - else - r = -ENOTSUPP; - - *(int *)rtn = r; + *(int *)rtn = kvmppc_core_check_processor_compat(); } struct kvm *kvm_arch_create_vm(void) @@ -144,9 +137,6 @@ int kvm_dev_ioctl_check_extension(long ext) int r; switch (ext) { - case KVM_CAP_USER_MEMORY: - r = 1; - break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; @@ -179,30 +169,15 @@ void kvm_arch_flush_shadow(struct kvm *kvm) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; - int err; - - vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); - if (!vcpu) { - err = -ENOMEM; - goto out; - } - - err = kvm_vcpu_init(vcpu, kvm, id); - if (err) - goto free_vcpu; - + vcpu = kvmppc_core_vcpu_create(kvm, id); + kvmppc_create_vcpu_debugfs(vcpu, id); return vcpu; - -free_vcpu: - kmem_cache_free(kvm_vcpu_cache, vcpu); -out: - return ERR_PTR(err); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { - kvm_vcpu_uninit(vcpu); - kmem_cache_free(kvm_vcpu_cache, vcpu); + kvmppc_remove_vcpu_debugfs(vcpu); + kvmppc_core_vcpu_free(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -212,16 +187,14 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; - - return test_bit(priority, &vcpu->arch.pending_exceptions); + return kvmppc_core_pending_dec(vcpu); } static void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); + kvmppc_core_queue_dec(vcpu); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -242,96 +215,25 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvmppc_core_destroy_mmu(vcpu); } -/* Note: clearing MSR[DE] just means that the debug interrupt will not be - * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. - * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt - * will be delivered as an "imprecise debug event" (which is indicated by - * DBSR[IDE]. - */ -static void kvmppc_disable_debug_interrupts(void) -{ - mtmsr(mfmsr() & ~MSR_DE); -} - -static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) -{ - kvmppc_disable_debug_interrupts(); - - mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); - mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); - mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); - mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); - mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); - mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); - mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); - mtmsr(vcpu->arch.host_msr); -} - -static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) -{ - struct kvm_guest_debug *dbg = &vcpu->guest_debug; - u32 dbcr0 = 0; - - vcpu->arch.host_msr = mfmsr(); - kvmppc_disable_debug_interrupts(); - - /* Save host debug register state. */ - vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); - vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); - vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); - vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); - vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); - vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); - vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); - - /* set registers up for guest */ - - if (dbg->bp[0]) { - mtspr(SPRN_IAC1, dbg->bp[0]); - dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; - } - if (dbg->bp[1]) { - mtspr(SPRN_IAC2, dbg->bp[1]); - dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; - } - if (dbg->bp[2]) { - mtspr(SPRN_IAC3, dbg->bp[2]); - dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; - } - if (dbg->bp[3]) { - mtspr(SPRN_IAC4, dbg->bp[3]); - dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; - } - - mtspr(SPRN_DBCR0, dbcr0); - mtspr(SPRN_DBCR1, 0); - mtspr(SPRN_DBCR2, 0); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - int i; - if (vcpu->guest_debug.enabled) - kvmppc_load_guest_debug_registers(vcpu); + kvmppc_core_load_guest_debugstate(vcpu); - /* Mark every guest entry in the shadow TLB entry modified, so that they - * will all be reloaded on the next vcpu run (instead of being - * demand-faulted). */ - for (i = 0; i <= tlb_44x_hwater; i++) - kvmppc_tlbe_set_modified(vcpu, i); + kvmppc_core_vcpu_load(vcpu, cpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { if (vcpu->guest_debug.enabled) - kvmppc_restore_host_debug_state(vcpu); + kvmppc_core_load_host_debugstate(vcpu); /* Don't leave guest TLB entries resident when being de-scheduled. */ /* XXX It would be nice to differentiate between heavyweight exit and * sched_out here, since we could avoid the TLB flush for heavyweight * exits. */ _tlbil_all(); + kvmppc_core_vcpu_put(vcpu); } int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, @@ -355,14 +257,14 @@ int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; *gpr = run->dcr.data; } static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; if (run->mmio.len > sizeof(*gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -460,7 +362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->arch.dcr_needed = 0; } - kvmppc_check_and_deliver_interrupts(vcpu); + kvmppc_core_deliver_interrupts(vcpu); local_irq_disable(); kvm_guest_enter(); @@ -478,7 +380,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c new file mode 100644 index 000000000000..47ee603f558e --- /dev/null +++ b/arch/powerpc/kvm/timing.c @@ -0,0 +1,239 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#include <linux/kvm_host.h> +#include <linux/fs.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> + +#include <asm/time.h> +#include <asm-generic/div64.h> + +#include "timing.h" + +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) +{ + int i; + + /* pause guest execution to avoid concurrent updates */ + local_irq_disable(); + mutex_lock(&vcpu->mutex); + + vcpu->arch.last_exit_type = 0xDEAD; + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + vcpu->arch.timing_count_type[i] = 0; + vcpu->arch.timing_max_duration[i] = 0; + vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF; + vcpu->arch.timing_sum_duration[i] = 0; + vcpu->arch.timing_sum_quad_duration[i] = 0; + } + vcpu->arch.timing_last_exit = 0; + vcpu->arch.timing_exit.tv64 = 0; + vcpu->arch.timing_last_enter.tv64 = 0; + + mutex_unlock(&vcpu->mutex); + local_irq_enable(); +} + +static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type) +{ + u64 old; + + do_div(duration, tb_ticks_per_usec); + if (unlikely(duration > 0xFFFFFFFF)) { + printk(KERN_ERR"%s - duration too big -> overflow" + " duration %lld type %d exit #%d\n", + __func__, duration, type, + vcpu->arch.timing_count_type[type]); + return; + } + + vcpu->arch.timing_count_type[type]++; + + /* sum */ + old = vcpu->arch.timing_sum_duration[type]; + vcpu->arch.timing_sum_duration[type] += duration; + if (unlikely(old > vcpu->arch.timing_sum_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, vcpu->arch.timing_sum_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* square sum */ + old = vcpu->arch.timing_sum_quad_duration[type]; + vcpu->arch.timing_sum_quad_duration[type] += (duration*duration); + if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) { + printk(KERN_ERR"%s - wrap adding sum of squared durations" + " old %lld new %lld type %d exit # of type %d\n", + __func__, old, + vcpu->arch.timing_sum_quad_duration[type], + type, vcpu->arch.timing_count_type[type]); + } + + /* set min/max */ + if (unlikely(duration < vcpu->arch.timing_min_duration[type])) + vcpu->arch.timing_min_duration[type] = duration; + if (unlikely(duration > vcpu->arch.timing_max_duration[type])) + vcpu->arch.timing_max_duration[type] = duration; +} + +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) +{ + u64 exit = vcpu->arch.timing_last_exit; + u64 enter = vcpu->arch.timing_last_enter.tv64; + + /* save exit time, used next exit when the reenter time is known */ + vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64; + + if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0)) + return; /* skip incomplete cycle (e.g. after reset) */ + + /* update statistics for average and standard deviation */ + add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type); + /* enter -> timing_last_exit is time spent in guest - log this too */ + add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter), + TIMEINGUEST); +} + +static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { + [MMIO_EXITS] = "MMIO", + [DCR_EXITS] = "DCR", + [SIGNAL_EXITS] = "SIGNAL", + [ITLB_REAL_MISS_EXITS] = "ITLBREAL", + [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT", + [DTLB_REAL_MISS_EXITS] = "DTLBREAL", + [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT", + [SYSCALL_EXITS] = "SYSCALL", + [ISI_EXITS] = "ISI", + [DSI_EXITS] = "DSI", + [EMULATED_INST_EXITS] = "EMULINST", + [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT", + [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE", + [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR", + [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR", + [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR", + [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR", + [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX", + [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE", + [EMULATED_RFI_EXITS] = "EMUL_RFI", + [DEC_EXITS] = "DEC", + [EXT_INTR_EXITS] = "EXTINT", + [HALT_WAKEUP] = "HALT", + [USR_PR_INST] = "USR_PR_INST", + [FP_UNAVAIL] = "FP_UNAVAIL", + [DEBUG_EXITS] = "DEBUG", + [TIMEINGUEST] = "TIMEINGUEST" +}; + +static int kvmppc_exit_timing_show(struct seq_file *m, void *private) +{ + struct kvm_vcpu *vcpu = m->private; + int i; + + seq_printf(m, "%s", "type count min max sum sum_squared\n"); + + for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) { + seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n", + kvm_exit_names[i], + vcpu->arch.timing_count_type[i], + vcpu->arch.timing_min_duration[i], + vcpu->arch.timing_max_duration[i], + vcpu->arch.timing_sum_duration[i], + vcpu->arch.timing_sum_quad_duration[i]); + } + return 0; +} + +/* Write 'c' to clear the timing statistics. */ +static ssize_t kvmppc_exit_timing_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int err = -EINVAL; + char c; + + if (count > 1) { + goto done; + } + + if (get_user(c, user_buf)) { + err = -EFAULT; + goto done; + } + + if (c == 'c') { + struct seq_file *seqf = (struct seq_file *)file->private_data; + struct kvm_vcpu *vcpu = seqf->private; + /* Write does not affect our buffers previously generated with + * show. seq_file is locked here to prevent races of init with + * a show call */ + mutex_lock(&seqf->lock); + kvmppc_init_timing_stats(vcpu); + mutex_unlock(&seqf->lock); + err = count; + } + +done: + return err; +} + +static int kvmppc_exit_timing_open(struct inode *inode, struct file *file) +{ + return single_open(file, kvmppc_exit_timing_show, inode->i_private); +} + +static struct file_operations kvmppc_exit_timing_fops = { + .owner = THIS_MODULE, + .open = kvmppc_exit_timing_open, + .read = seq_read, + .write = kvmppc_exit_timing_write, + .llseek = seq_lseek, + .release = single_release, +}; + +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id) +{ + static char dbg_fname[50]; + struct dentry *debugfs_file; + + snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing", + current->pid, id); + debugfs_file = debugfs_create_file(dbg_fname, 0666, + kvm_debugfs_dir, vcpu, + &kvmppc_exit_timing_fops); + + if (!debugfs_file) { + printk(KERN_ERR"%s: error creating debugfs file %s\n", + __func__, dbg_fname); + return; + } + + vcpu->arch.debugfs_exit_timing = debugfs_file; +} + +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.debugfs_exit_timing) { + debugfs_remove(vcpu->arch.debugfs_exit_timing); + vcpu->arch.debugfs_exit_timing = NULL; + } +} diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h new file mode 100644 index 000000000000..bb13b1f3cd5a --- /dev/null +++ b/arch/powerpc/kvm/timing.h @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + */ + +#ifndef __POWERPC_KVM_EXITTIMING_H__ +#define __POWERPC_KVM_EXITTIMING_H__ + +#include <linux/kvm_host.h> +#include <asm/kvm_host.h> + +#ifdef CONFIG_KVM_EXIT_TIMING +void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu); +void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id); +void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu); + +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) +{ + vcpu->arch.last_exit_type = type; +} + +#else +/* if exit timing is not configured there is no need to build the c file */ +static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, + unsigned int id) {} +static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {} +static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} +#endif /* CONFIG_KVM_EXIT_TIMING */ + +/* account the exit in kvm_stats */ +static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) +{ + /* type has to be known at build time for optimization */ + BUILD_BUG_ON(__builtin_constant_p(type)); + switch (type) { + case EXT_INTR_EXITS: + vcpu->stat.ext_intr_exits++; + break; + case DEC_EXITS: + vcpu->stat.dec_exits++; + break; + case EMULATED_INST_EXITS: + vcpu->stat.emulated_inst_exits++; + break; + case DCR_EXITS: + vcpu->stat.dcr_exits++; + break; + case DSI_EXITS: + vcpu->stat.dsi_exits++; + break; + case ISI_EXITS: + vcpu->stat.isi_exits++; + break; + case SYSCALL_EXITS: + vcpu->stat.syscall_exits++; + break; + case DTLB_REAL_MISS_EXITS: + vcpu->stat.dtlb_real_miss_exits++; + break; + case DTLB_VIRT_MISS_EXITS: + vcpu->stat.dtlb_virt_miss_exits++; + break; + case MMIO_EXITS: + vcpu->stat.mmio_exits++; + break; + case ITLB_REAL_MISS_EXITS: + vcpu->stat.itlb_real_miss_exits++; + break; + case ITLB_VIRT_MISS_EXITS: + vcpu->stat.itlb_virt_miss_exits++; + break; + case SIGNAL_EXITS: + vcpu->stat.signal_exits++; + break; + } +} + +/* wrapper to set exit time and account for it in kvm_stats */ +static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type) +{ + kvmppc_set_exit_type(vcpu, type); + kvmppc_account_exit_stat(vcpu, type); +} + +#endif /* __POWERPC_KVM_EXITTIMING_H__ */ diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2949126d28d1..6b793aeda72e 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -297,7 +297,7 @@ static inline unsigned long fast_get_dcookie(struct path *path) { unsigned long cookie; - if (path->dentry->d_cookie) + if (path->dentry->d_flags & DCACHE_COOKIE) return (unsigned long)path->dentry; get_dcookie(path, &cookie); return cookie; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index f7a69021b7bf..84e058f1e1cc 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -332,7 +332,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) +static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -870,7 +870,7 @@ void xics_migrate_irqs_away(void) /* Reset affinity to all cpus */ irq_desc[virq].affinity = CPU_MASK_ALL; - desc->chip->set_affinity(virq, CPU_MASK_ALL); + desc->chip->set_affinity(virq, cpu_all_mask); unlock: spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index c82babb70074..3e0d89dcdba2 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -806,7 +806,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -818,7 +818,7 @@ void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) } else { cpumask_t tmp; - cpus_and(tmp, cpumask, cpu_online_map); + cpumask_and(&tmp, cpumask, cpu_online_mask); mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 6209c62a426d..3cef2af10f42 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, cpumask_t cpumask); +extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8152fefc97b9..19577aeffd7b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -83,6 +83,7 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK + select INIT_ALL_POSSIBLE source "init/Kconfig" diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index e5a6a9ba3adf..d60a2eefb17b 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -14,7 +14,6 @@ struct s390_idle_data { spinlock_t lock; - unsigned int in_idle; unsigned long long idle_count; unsigned long long idle_enter; unsigned long long idle_time; @@ -22,12 +21,12 @@ struct s390_idle_data { DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void s390_idle_leave(void); +void vtime_start_cpu(void); static inline void s390_idle_check(void) { - if ((&__get_cpu_var(s390_idle))->in_idle) - s390_idle_leave(); + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) + vtime_start_cpu(); } #endif /* _ASM_S390_CPU_H_ */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 133ce054fc89..521726430afa 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -11,7 +11,7 @@ #include <asm/div64.h> -/* We want to use micro-second resolution. */ +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long cputime_t; typedef unsigned long long cputime64_t; @@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base) #define cputime_ge(__a, __b) ((__a) >= (__b)) #define cputime_lt(__a, __b) ((__a) < (__b)) #define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) +#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) #define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) +#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) #define cputime64_zero (0ULL) #define cputime64_add(__a, __b) ((__a) + (__b)) @@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base) static inline u64 cputime64_to_jiffies64(cputime64_t cputime) { - do_div(cputime, 1000000 / HZ); + do_div(cputime, 4096000000ULL / HZ); return cputime; } @@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime) static inline unsigned int cputime_to_msecs(const cputime_t cputime) { - return __div(cputime, 1000); + return __div(cputime, 4096000); } static inline cputime_t msecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 1000; + return (cputime_t) m * 4096000; } /* @@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m) static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 1000000); + return __div(cputime, 2048000000) >> 1; } static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 1000000; + return (cputime_t) s * 4096000000ULL; } /* @@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s) static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; + return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_nsec = rp.subreg.even * 1000; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 1000000) * 1000; - value->tv_sec = cputime / 1000000; + value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec + (u64) value->tv_sec * 1000000; + return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_usec = rp.subreg.even; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = cputime % 1000000; - value->tv_sec = cputime / 1000000; + value->tv_usec = cputime % 4096000000ULL; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (1000000 / USER_HZ); + return (cputime_t) x * (4096000000ULL / USER_HZ); } /* @@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x) static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0bc51d52a899..ffdef5fe8587 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -67,11 +67,11 @@ #define __LC_SYNC_ENTER_TIMER 0x248 #define __LC_ASYNC_ENTER_TIMER 0x250 #define __LC_EXIT_TIMER 0x258 -#define __LC_LAST_UPDATE_TIMER 0x260 -#define __LC_USER_TIMER 0x268 -#define __LC_SYSTEM_TIMER 0x270 -#define __LC_LAST_UPDATE_CLOCK 0x278 -#define __LC_STEAL_CLOCK 0x280 +#define __LC_USER_TIMER 0x260 +#define __LC_SYSTEM_TIMER 0x268 +#define __LC_STEAL_TIMER 0x270 +#define __LC_LAST_UPDATE_TIMER 0x278 +#define __LC_LAST_UPDATE_CLOCK 0x280 #define __LC_RETURN_MCCK_PSW 0x288 #define __LC_KERNEL_STACK 0xC40 #define __LC_THREAD_INFO 0xC44 @@ -89,11 +89,11 @@ #define __LC_SYNC_ENTER_TIMER 0x250 #define __LC_ASYNC_ENTER_TIMER 0x258 #define __LC_EXIT_TIMER 0x260 -#define __LC_LAST_UPDATE_TIMER 0x268 -#define __LC_USER_TIMER 0x270 -#define __LC_SYSTEM_TIMER 0x278 -#define __LC_LAST_UPDATE_CLOCK 0x280 -#define __LC_STEAL_CLOCK 0x288 +#define __LC_USER_TIMER 0x268 +#define __LC_SYSTEM_TIMER 0x270 +#define __LC_STEAL_TIMER 0x278 +#define __LC_LAST_UPDATE_TIMER 0x280 +#define __LC_LAST_UPDATE_CLOCK 0x288 #define __LC_RETURN_MCCK_PSW 0x290 #define __LC_KERNEL_STACK 0xD40 #define __LC_THREAD_INFO 0xD48 @@ -106,8 +106,10 @@ #define __LC_IPLDEV 0xDB8 #define __LC_CURRENT 0xDD8 #define __LC_INT_CLOCK 0xDE8 +#define __LC_VDSO_PER_CPU 0xE38 #endif /* __s390x__ */ +#define __LC_PASTE 0xE40 #define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ @@ -252,11 +254,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x248 */ __u64 async_enter_timer; /* 0x250 */ __u64 exit_timer; /* 0x258 */ - __u64 last_update_timer; /* 0x260 */ - __u64 user_timer; /* 0x268 */ - __u64 system_timer; /* 0x270 */ - __u64 last_update_clock; /* 0x278 */ - __u64 steal_clock; /* 0x280 */ + __u64 user_timer; /* 0x260 */ + __u64 system_timer; /* 0x268 */ + __u64 steal_timer; /* 0x270 */ + __u64 last_update_timer; /* 0x278 */ + __u64 last_update_clock; /* 0x280 */ psw_t return_mcck_psw; /* 0x288 */ __u8 pad8[0xc00-0x290]; /* 0x290 */ @@ -343,11 +345,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x250 */ __u64 async_enter_timer; /* 0x258 */ __u64 exit_timer; /* 0x260 */ - __u64 last_update_timer; /* 0x268 */ - __u64 user_timer; /* 0x270 */ - __u64 system_timer; /* 0x278 */ - __u64 last_update_clock; /* 0x280 */ - __u64 steal_clock; /* 0x288 */ + __u64 user_timer; /* 0x268 */ + __u64 system_timer; /* 0x270 */ + __u64 steal_timer; /* 0x278 */ + __u64 last_update_timer; /* 0x280 */ + __u64 last_update_clock; /* 0x288 */ psw_t return_mcck_psw; /* 0x290 */ __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ /* System info area */ @@ -381,7 +383,12 @@ struct _lowcore /* whether the kernel died with panic() or not */ __u32 panic_magic; /* 0xe00 */ - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ + /* Per cpu primary space access list */ + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ + __u32 vdso_per_cpu_data; /* 0xe3c */ + __u32 paste[16]; /* 0xe40 */ + + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ /* 64 bit extparam used for pfault, diag 250 etc */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 024ef42ed6d7..3a8b26eb1f2e 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *); +extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); @@ -121,7 +121,7 @@ static inline void cmma_init(void) { } #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev); \ + account_vtime(prev, current); \ } while (0) #define nop() asm volatile("nop") diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c1eaf9604da7..c544aa524535 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -47,6 +47,8 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + __u64 user_timer; + __u64 system_timer; }; /* diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h index 61705d60f995..e4bcab739c19 100644 --- a/arch/s390/include/asm/timer.h +++ b/arch/s390/include/asm/timer.h @@ -23,20 +23,18 @@ struct vtimer_list { __u64 expires; __u64 interval; - spinlock_t lock; - unsigned long magic; - void (*function)(unsigned long); unsigned long data; }; -/* the offset value will wrap after ca. 71 years */ +/* the vtimer value will wrap after ca. 71 years */ struct vtimer_queue { struct list_head list; spinlock_t lock; - __u64 to_expire; /* current event expire time */ - __u64 offset; /* list offset to zero */ - __u64 idle; /* temp var for idle */ + __u64 timer; /* last programmed timer */ + __u64 elapsed; /* elapsed time of timer expire values */ + __u64 idle; /* temp var for idle */ + int do_spt; /* =1: reprogram cpu timer in idle */ }; extern void init_virt_timer(struct vtimer_list *timer); @@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_start_cpu_timer(void); -extern void vtime_stop_cpu_timer(void); +extern void vtime_stop_cpu(void); +extern void vtime_start_leave(void); #endif /* __KERNEL__ */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index a44f4fe16a35..7bdd7c8ebc91 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,9 +12,9 @@ #ifndef __ASSEMBLY__ /* - * Note about this structure: + * Note about the vdso_data and vdso_per_cpu_data structures: * - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the * structure is supposed to be known only to the function in the vdso * itself and may change without notice. */ @@ -28,10 +28,21 @@ struct vdso_data { __u64 wtom_clock_nsec; /* 0x28 */ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; }; extern struct vdso_data *vdso_data; +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e641f60bac99..67a60016babb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -48,6 +48,11 @@ int main(void) DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_ECTG_BASE, + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, + offsetof(struct vdso_per_cpu_data, ectg_user_time)); /* constants used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 55de521aef77..1268aa2991bf 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -583,8 +583,8 @@ kernel_per: .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 @@ -723,8 +723,8 @@ io_notify_resume: .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 @@ -750,6 +750,7 @@ __critical_end: .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 16bb4fd1a403..c6fbde13971a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ .if !\sync ni \psworg+1,0xfd # clear wait state bit .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user lpswe \psworg # back to caller .endm @@ -559,8 +562,8 @@ kernel_per: */ .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 @@ -721,8 +724,8 @@ io_notify_resume: */ .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 @@ -746,6 +749,7 @@ __critical_end: */ .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -979,23 +983,23 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: .quad sysc_done - 4 - .quad sysc_done - 8 + .quad sysc_done - 16 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1005,23 +1009,23 @@ cleanup_io_return: cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: .quad io_done - 4 - .quad io_done - 8 + .quad io_done - 16 /* * Integer constants diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3ccd36b24b8f..f9f70aa15244 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -87,6 +87,8 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # diff --git a/arch/s390/kernel/init_task.c b/arch/s390/kernel/init_task.c index e80716843619..7db95c0b8693 100644 --- a/arch/s390/kernel/init_task.c +++ b/arch/s390/kernel/init_task.c @@ -16,7 +16,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 04f8c67a6101..b6110bdf8dc2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -38,6 +38,7 @@ #include <linux/utsname.h> #include <linux/tick.h> #include <linux/elfcore.h> +#include <linux/kernel_stat.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -45,7 +46,6 @@ #include <asm/processor.h> #include <asm/irq.h> #include <asm/timer.h> -#include <asm/cpu.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) -}; - -static int s390_idle_enter(void) -{ - struct s390_idle_data *idle; - - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_count++; - idle->in_idle = 1; - idle->idle_enter = get_clock(); - spin_unlock(&idle->lock); - vtime_stop_cpu_timer(); - return NOTIFY_OK; -} - -void s390_idle_leave(void) -{ - struct s390_idle_data *idle; - - vtime_start_cpu_timer(); - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_time += get_clock() - idle->idle_enter; - idle->in_idle = 0; - spin_unlock(&idle->lock); -} - extern void s390_handle_mcck(void); /* * The idle loop on a S390... @@ -117,10 +87,6 @@ static void default_idle(void) local_irq_enable(); return; } - if (s390_idle_enter() == NOTIFY_BAD) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); @@ -130,7 +96,6 @@ static void default_idle(void) local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); - s390_idle_leave(); local_irq_enable(); s390_handle_mcck(); return; @@ -138,9 +103,9 @@ static void default_idle(void) trace_hardirqs_on(); /* Don't trace preempt off for idle. */ stop_critical_timings(); - /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + /* Stop virtual timer and halt the cpu. */ + vtime_stop_cpu(); + /* Reenable preemption tracer. */ start_critical_timings(); } diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index e019b419efc6..a0d2d55d7fb3 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b7a1efd5522c..d825f4950e4e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -427,6 +427,8 @@ setup_lowcore(void) /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6fc78541dc57..9c0ccb532a45 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,6 +47,7 @@ #include <asm/lowcore.h> #include <asm/sclp.h> #include <asm/cpu.h> +#include <asm/vdso.h> #include "entry.h" /* @@ -55,12 +56,6 @@ struct _lowcore *lowcore_ptr[NR_CPUS]; EXPORT_SYMBOL(lowcore_ptr); -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - -cpumask_t cpu_possible_map = CPU_MASK_ALL; -EXPORT_SYMBOL(cpu_possible_map); - static struct task_struct *current_set[NR_CPUS]; static u8 smp_cpu_type; @@ -506,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu) goto out; lowcore->extended_save_area_addr = (u32) save_area; } +#else + if (vdso_alloc_per_cpu(cpu, lowcore)) + goto out; #endif lowcore_ptr[cpu] = lowcore; return 0; @@ -528,6 +526,8 @@ static void smp_free_lowcore(int cpu) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) free_page((unsigned long) lowcore->extended_save_area_addr); +#else + vdso_free_per_cpu(cpu, lowcore); #endif free_page(lowcore->panic_stack - PAGE_SIZE); free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); @@ -670,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); panic_stack = __get_free_page(GFP_KERNEL); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + BUG_ON(!lowcore || !panic_stack || !async_stack); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) save_area = get_zeroed_page(GFP_KERNEL); @@ -683,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; +#else + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); @@ -851,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev, unsigned long long idle_count; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); + spin_lock(&idle->lock); idle_count = idle->idle_count; - spin_unlock_irq(&idle->lock); + if (idle->idle_enter) + idle_count++; + spin_unlock(&idle->lock); return sprintf(buf, "%llu\n", idle_count); } static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -862,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; - unsigned long long new_time; + unsigned long long now, idle_time, idle_enter; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - if (idle->in_idle) { - new_time = get_clock(); - idle->idle_time += new_time - idle->idle_enter; - idle->idle_enter = new_time; - } - new_time = idle->idle_time; - spin_unlock_irq(&idle->lock); - return sprintf(buf, "%llu\n", new_time >> 12); + spin_lock(&idle->lock); + now = get_clock(); + idle_time = idle->idle_time; + idle_enter = idle->idle_enter; + if (idle_enter != 0ULL && idle_enter < now) + idle_time += now - idle_enter; + spin_unlock(&idle->lock); + return sprintf(buf, "%llu\n", idle_time >> 12); } static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5be981a36c3e..d649600df5b9 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -160,7 +160,7 @@ void init_cpu_timer(void) cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; - cd->cpumask = cpumask_of_cpu(cpu); + cd->cpumask = cpumask_of(cpu); cd->set_next_event = s390_next_event; cd->set_mode = s390_set_mode; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10a6ccef4412..25a6a82f1c02 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -31,9 +31,6 @@ #include <asm/sections.h> #include <asm/vdso.h> -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; static void *vdso32_kbase = &vdso32_start; @@ -71,6 +68,119 @@ static union { struct vdso_data *vdso_data = &vdso_data_store.data; /* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + unsigned int facility_list; + + facility_list = stfl(); + vd->ectg_available = switch_amode && (facility_list & 1); +} + +#ifdef CONFIG_64BIT +/* + * Setup per cpu vdso data page. + */ +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) +{ +} + +/* + * Allocate/free per cpu vdso data. + */ +#ifdef CONFIG_64BIT +#define SEGMENT_ORDER 2 +#else +#define SEGMENT_ORDER 1 +#endif + +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!switch_amode || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!switch_amode || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __vdso_init_cr5(void *dummy) +{ + unsigned long cr5; + + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} + +static void vdso_init_cr5(void) +{ + if (switch_amode && vdso_enabled) + on_each_cpu(__vdso_init_cr5, NULL, 1); +} +#endif /* CONFIG_64BIT */ + +/* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree */ @@ -172,6 +282,9 @@ static int __init vdso_init(void) { int i; + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) /* Calculate the size of the 32 bit vDSO */ vdso32_pages = ((&vdso32_end - &vdso32_start @@ -208,6 +321,10 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; +#ifndef CONFIG_SMP + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); +#endif + vdso_init_cr5(); #endif /* CONFIG_64BIT */ get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 488e31a3c0e7..9ce8caafdb4e 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -22,7 +22,12 @@ __kernel_clock_getres: cghi %r2,CLOCK_REALTIME je 0f cghi %r2,CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ larl %r1,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 738a410b7eb2..79dbfee831ec 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -22,8 +22,10 @@ __kernel_clock_gettime: larl %r5,_vdso_data cghi %r2,CLOCK_REALTIME je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f cghi %r2,CLOCK_MONOTONIC - jne 9f + jne 12f /* CLOCK_MONOTONIC */ ltgr %r3,%r3 @@ -42,7 +44,7 @@ __kernel_clock_gettime: alg %r0,__VDSO_WTOM_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b - larl %r5,10f + larl %r5,13f 1: clg %r1,0(%r5) jl 2f slg %r1,0(%r5) @@ -68,7 +70,7 @@ __kernel_clock_gettime: lg %r0,__VDSO_XTIME_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 5b - larl %r5,10f + larl %r5,13f 6: clg %r1,0(%r5) jl 7f slg %r1,0(%r5) @@ -79,11 +81,38 @@ __kernel_clock_gettime: 8: lghi %r2,0 br %r14 + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + sacf 0 + sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + /* Fallback to system call */ -9: lghi %r1,__NR_clock_gettime +12: lghi %r1,__NR_clock_gettime svc 0 br %r14 -10: .quad 1000000000 +13: .quad 1000000000 +14: .quad 19342813113834067 .cfi_endproc .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 75a6e62ea973..2fb36e462194 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -23,19 +23,43 @@ #include <asm/s390_ext.h> #include <asm/timer.h> #include <asm/irq_regs.h> +#include <asm/cpu.h> static ext_int_info_t ext_int_info_timer; + static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) +}; + +static inline __u64 get_vtimer(void) +{ + __u64 timer; + + asm volatile("STPT %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(__u64 expires) +{ + __u64 timer; + + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " SPT %1" /* Set new value immediatly afterwards */ + : "=m" (timer) : "m" (expires) ); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_process_tick(struct task_struct *tsk, int user_tick) +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick) : "=m" (S390_lowcore.last_update_timer), "=m" (S390_lowcore.last_update_clock) ); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; - - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); - - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); - - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); + + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, hardirq_offset, system, system); + + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); } } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_vtime(struct task_struct *tsk) +void account_vtime(struct task_struct *prev, struct task_struct *next) { - cputime_t cputime; - __u64 timer; - - timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - - cputime = S390_lowcore.user_timer >> 12; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(next); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + do_account_vtime(tsk, HARDIRQ_OFFSET); } /* @@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk) */ void account_system_vtime(struct task_struct *tsk) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, system; timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; + account_system_time(tsk, 0, system, system); } EXPORT_SYMBOL_GPL(account_system_vtime); -static inline void set_vtimer(__u64 expires) -{ - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; -} - -void vtime_start_cpu_timer(void) +void vtime_start_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + __u64 idle_time, expires; + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle_time = S390_lowcore.int_clock - idle->idle_enter; + account_idle_time(idle_time); + S390_lowcore.last_update_clock = S390_lowcore.int_clock; + + /* Account system time spent going idle. */ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; + + /* Restart vtime CPU timer */ + if (vq->do_spt) { + /* Program old expire value but first save progress. */ + expires = vq->idle - S390_lowcore.async_enter_timer; + expires += get_vtimer(); + set_vtimer(expires); + } else { + /* Don't account the CPU timer delta while the cpu was idle. */ + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; + } - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + spin_lock(&idle->lock); + idle->idle_time += idle_time; + idle->idle_enter = 0ULL; + idle->idle_count++; + spin_unlock(&idle->lock); } -void vtime_stop_cpu_timer(void) +void vtime_stop_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + psw_t psw; + + /* Wait for external, I/O or machine check interrupt. */ + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + + /* Check if the CPU timer needs to be reprogrammed. */ + if (vq->do_spt) { + __u64 vmax = VTIMER_MAX_SLICE; + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * set_cpu_timer(VTIMER_MAX_SLICE); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " spt 0(%5)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) + : "memory", "cc", "1"); + } else { + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "m" (psw) + : "memory", "cc", "1"); } - - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); - - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; - - vt_list->offset += vt_list->to_expire - vt_list->idle; - - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); } /* @@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) */ static void do_callbacks(struct list_head *cb_list) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); - unsigned long data; if (list_empty(cb_list)) return; - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); + list_del_init(&event->entry); + (event->function)(event->data); + if (event->interval) { + /* Recharge interval timer */ + event->expires = event->interval + vq->elapsed; + spin_lock(&vq->lock); + list_add_sorted(event, &vq->list); + spin_unlock(&vq->lock); } } } @@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list) */ static void do_cpu_timer_interrupt(__u16 error_code) { - __u64 next, delta; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; + struct list_head cb_list; /* the callback queue */ + __u64 elapsed, next; INIT_LIST_HEAD(&cb_list); - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + spin_lock(&vq->lock); + + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); + BUG_ON((s64) elapsed < 0); + vq->elapsed = 0; + list_for_each_entry_safe(event, tmp, &vq->list, entry) { + if (event->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&event->entry, &cb_list); + else + event->expires -= elapsed; } - spin_unlock(&vt_list->lock); + spin_unlock(&vq->lock); + + vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; - } - spin_unlock(&vt_list->lock); - set_vtimer(next); + next = VTIMER_MAX_SLICE; + spin_lock(&vq->lock); + if (!list_empty(&vq->list)) { + event = list_first_entry(&vq->list, struct vtimer_list, entry); + next = event->expires; + } else + vq->do_spt = 0; + spin_unlock(&vq->lock); + /* + * To improve precision add the time spent by the + * interrupt handler to the elapsed time. + * Note: CPU timer counts down and we got an interrupt, + * the current content is negative + */ + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); + set_vtimer(next - elapsed); + vq->timer = next - elapsed; + vq->elapsed = elapsed; } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); @@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer) */ static void internal_add_vtimer(struct vtimer_list *timer) { + struct vtimer_queue *vq; unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; + __u64 left, expires; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); BUG_ON(timer->cpu != smp_processor_id()); - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&vq->list)) { + /* First timer on this cpu, just program it. */ + list_add(&timer->entry, &vq->list); + set_vtimer(timer->expires); + vq->timer = timer->expires; + vq->elapsed = 0; + } else { + /* Check progress of old timers. */ + expires = timer->expires; + left = get_vtimer(); + if (likely((s64) expires < (s64) left)) { + /* The new timer expires before the current timer. */ + set_vtimer(expires); + vq->elapsed += vq->timer - left; + vq->timer = expires; + } else { + vq->elapsed += vq->timer - left; + vq->timer = left; + } + /* Insert new timer into per cpu list. */ + timer->expires += vq->elapsed; + list_add_sorted(timer, &vq->list); } - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ put_cpu(); } @@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic); * If we change a pending timer the function must be called on the CPU * where the timer is running on, e.g. by smp_call_function_single() * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. + * The original mod_timer adds the timer if it is not pending. For + * compatibility we do the same. The timer will be added on the current + * CPU as a oneshot timer. * * returns whether it has modified a pending timer (1) or not (0) */ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; unsigned long flags; int cpu; @@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) return 1; cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); + vq = &per_cpu(virt_cpu_timer, cpu); /* check if we run on the right CPU */ BUG_ON(timer->cpu != cpu); /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); + spin_lock_irqsave(&vq->lock, flags); /* if timer isn't pending add it on the current CPU */ if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* we do not activate an interval timer with mod_virt_timer */ timer->interval = 0; timer->expires = expires; @@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) timer->interval = expires; /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); internal_add_vtimer(timer); return 1; } @@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer); int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); /* we don't interrupt a running timer, just let it expire! */ list_del_init(&timer->entry); - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer)); + + /* initialize per cpu vtimer structure */ + vq = &__get_cpu_var(virt_cpu_timer); + INIT_LIST_HEAD(&vq->list); + spin_lock_init(&vq->lock); /* enable cpu timer interrupts */ __ctl_set_bit(0,10); - - vt_list = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - } void __init vtime_init(void) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8b00eb2ddf57..be8497186b96 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -113,8 +113,6 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { switch (ext) { - case KVM_CAP_USER_MEMORY: - return 1; default: return 0; } @@ -185,8 +183,6 @@ struct kvm *kvm_arch_create_vm(void) debug_register_view(kvm->arch.dbf, &debug_sprintf_view); VM_EVENT(kvm, 3, "%s", "vm created"); - try_module_get(THIS_MODULE); - return kvm; out_nodbf: free_page((unsigned long)(kvm->arch.sca)); @@ -196,13 +192,33 @@ out_nokvm: return ERR_PTR(rc); } +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); + kfree(vcpu); +} + +static void kvm_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + void kvm_arch_destroy_vm(struct kvm *kvm) { - debug_unregister(kvm->arch.dbf); + kvm_free_vcpus(kvm); kvm_free_physmem(kvm); free_page((unsigned long)(kvm->arch.sca)); + debug_unregister(kvm->arch.dbf); kfree(kvm); - module_put(THIS_MODULE); } /* Section: vcpu related */ @@ -213,8 +229,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { - /* kvm common code refers to this, but does'nt call it */ - BUG(); + /* Nothing todo */ } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -308,8 +323,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); - try_module_get(THIS_MODULE); - return vcpu; out_free_cpu: kfree(vcpu); @@ -317,14 +330,6 @@ out_nomem: return ERR_PTR(rc); } -void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); - free_page((unsigned long)(vcpu->arch.sie_block)); - kfree(vcpu); - module_put(THIS_MODULE); -} - int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { /* kvm common code refers to this, but never calls it */ diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 85b660c17eb0..c24e9c6a1736 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -31,7 +31,7 @@ enum { }; void smp_message_recv(unsigned int msg); -void smp_timer_broadcast(cpumask_t mask); +void smp_timer_broadcast(const struct cpumask *mask); void local_timer_interrupt(void); void local_timer_setup(unsigned int cpu); diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 95f0085e098a..279d9cc4a007 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -5,7 +5,6 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/sh/kernel/init_task.c b/arch/sh/kernel/init_task.c index b151a25cb14d..80c35ff71d56 100644 --- a/arch/sh/kernel/init_task.c +++ b/arch/sh/kernel/init_task.c @@ -7,7 +7,6 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct pt_regs fake_swapper_regs; diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 3c5ad1660bbc..8f4027412614 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -31,12 +31,6 @@ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); - -cpumask_t cpu_online_map; -EXPORT_SYMBOL(cpu_online_map); - static inline void __init smp_store_cpu_info(unsigned int cpu) { struct sh_cpuinfo *c = cpu_data + cpu; @@ -190,11 +184,11 @@ void arch_send_call_function_single_ipi(int cpu) plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); } -void smp_timer_broadcast(cpumask_t mask) +void smp_timer_broadcast(const struct cpumask *mask) { int cpu; - for_each_cpu_mask(cpu, mask) + for_each_cpu(cpu, mask) plat_send_ipi(cpu, SMP_MSG_TIMER); } diff --git a/arch/sh/kernel/timers/timer-broadcast.c b/arch/sh/kernel/timers/timer-broadcast.c index c2317635230f..96e8eaea1e62 100644 --- a/arch/sh/kernel/timers/timer-broadcast.c +++ b/arch/sh/kernel/timers/timer-broadcast.c @@ -51,7 +51,7 @@ void __cpuinit local_timer_setup(unsigned int cpu) clk->mult = 1; clk->set_mode = dummy_timer_set_mode; clk->broadcast = smp_timer_broadcast; - clk->cpumask = cpumask_of_cpu(cpu); + clk->cpumask = cpumask_of(cpu); clockevents_register_device(clk); } diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c index 3c61ddd4d43e..0db3f9510336 100644 --- a/arch/sh/kernel/timers/timer-tmu.c +++ b/arch/sh/kernel/timers/timer-tmu.c @@ -263,7 +263,7 @@ static int tmu_timer_init(void) tmu0_clockevent.min_delta_ns = clockevent_delta2ns(1, &tmu0_clockevent); - tmu0_clockevent.cpumask = cpumask_of_cpu(0); + tmu0_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&tmu0_clockevent); diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index a8180e546a48..8408d9d2a662 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -29,8 +29,6 @@ */ extern unsigned char boot_cpu_id; -extern cpumask_t phys_cpu_present_map; -#define cpu_possible_map phys_cpu_present_map typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/arch/sparc/kernel/init_task.c b/arch/sparc/kernel/init_task.c index 62126e4cec54..f28cb8278e98 100644 --- a/arch/sparc/kernel/init_task.c +++ b/arch/sparc/kernel/init_task.c @@ -8,7 +8,6 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index a3ea2bcb95de..cab8e0286871 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -312,7 +312,8 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4u_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { sun4u_irq_enable(virt_irq); } @@ -362,7 +363,8 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; unsigned long cpuid = irq_choose_cpu(virt_irq); @@ -429,7 +431,8 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) +static void sun4v_virt_set_affinity(unsigned int virt_irq, + const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; int err; @@ -851,7 +854,7 @@ void fixup_irqs(void) !(irq_desc[irq].status & IRQ_PER_CPU)) { if (irq_desc[irq].chip->set_affinity) irq_desc[irq].chip->set_affinity(irq, - irq_desc[irq].affinity); + &irq_desc[irq].affinity); } spin_unlock_irqrestore(&irq_desc[irq].lock, flags); } diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index 46e231f7c5ce..322046cdf85f 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -780,7 +780,7 @@ out: if (nid != -1) { cpumask_t numa_mask = node_to_cpumask(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } return irq; diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index 2e680f34f727..0d0cd815e83e 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -288,7 +288,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, if (nid != -1) { cpumask_t numa_mask = node_to_cpumask(nid); - irq_set_affinity(irq, numa_mask); + irq_set_affinity(irq, &numa_mask); } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index e396c1f17a92..1e5ac4e282e1 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -39,8 +39,6 @@ volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; unsigned char boot_cpu_id = 0; unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t phys_cpu_present_map = CPU_MASK_NONE; cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* The only guaranteed locking primitive available on all Sparc @@ -334,7 +332,7 @@ void __init smp_setup_cpu_possible_map(void) instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid < NR_CPUS) { - cpu_set(mid, phys_cpu_present_map); + cpu_set(mid, cpu_possible_map); cpu_set(mid, cpu_present_map); } instance++; @@ -354,7 +352,7 @@ void __init smp_prepare_boot_cpu(void) current_thread_info()->cpu = cpuid; cpu_set(cpuid, cpu_online_map); - cpu_set(cpuid, phys_cpu_present_map); + cpu_set(cpuid, cpu_possible_map); } int __cpuinit __cpu_up(unsigned int cpu) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index bfe99d82d458..46329799f346 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -49,14 +49,10 @@ int sparc64_multi_core __read_mostly; -cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; -cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; -EXPORT_SYMBOL(cpu_possible_map); -EXPORT_SYMBOL(cpu_online_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c index a4d45fc29b21..e1e97639231b 100644 --- a/arch/sparc/kernel/sparc_ksyms_32.c +++ b/arch/sparc/kernel/sparc_ksyms_32.c @@ -112,10 +112,6 @@ EXPORT_PER_CPU_SYMBOL(__cpu_data); #ifdef CONFIG_SMP /* IRQ implementation. */ EXPORT_SYMBOL(synchronize_irq); - -/* CPU online map and active count. */ -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(phys_cpu_present_map); #endif EXPORT_SYMBOL(__udelay); diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 141da3759091..9df8f095a8b1 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -763,7 +763,7 @@ void __devinit setup_sparc64_timer(void) sevt = &__get_cpu_var(sparc64_events); memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); - sevt->cpumask = cpumask_of_cpu(smp_processor_id()); + sevt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(sevt); } diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c index 910eda8fca18..806d381947bf 100644 --- a/arch/um/kernel/init_task.c +++ b/arch/um/kernel/init_task.c @@ -10,7 +10,6 @@ #include "linux/mqueue.h" #include "asm/uaccess.h" -static struct fs_struct init_fs = INIT_FS; struct mm_struct init_mm = INIT_MM(init_mm); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index 045772142844..98351c78bc81 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -25,13 +25,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); #include "irq_user.h" #include "os.h" -/* CPU online map, set by smp_boot_cpus */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map = CPU_MASK_NONE; - -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_possible_map); - /* Per CPU bogomips and other parameters * The only piece used here is the ipi pipe, which is set before SMP is * started and never changed. diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 47f04f4a3464..b13a87a3ec95 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -50,7 +50,7 @@ static int itimer_next_event(unsigned long delta, static struct clock_event_device itimer_clockevent = { .name = "itimer", .rating = 250, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = itimer_set_mode, .set_next_event = itimer_next_event, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0f44add3e0b7..249d1e0824b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -601,19 +601,20 @@ config IOMMU_HELPER config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && BROKEN + depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + select CPUMASK_OFFSTACK default n help Configure maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. config NR_CPUS - int "Maximum number of CPUs (2-512)" if !MAXSMP - range 2 512 - depends on SMP + int "Maximum number of CPUs" if SMP && !MAXSMP + range 2 512 if SMP && !MAXSMP + default "1" if !SMP default "4096" if MAXSMP - default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 - default "8" + default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) + default "8" if SMP help This allows you to specify the maximum number of CPUs which this kernel will support. The maximum supported value is 512 and the diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index b195f85526e3..9dabd00e9805 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -24,15 +24,14 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/i387.h> -#include <asm/ia32.h> #include <asm/ptrace.h> #include <asm/ia32_unistd.h> #include <asm/user32.h> #include <asm/sigcontext32.h> #include <asm/proto.h> #include <asm/vdso.h> - #include <asm/sigframe.h> +#include <asm/sys_ia32.h> #define DEBUG_SIG 0 diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index d21991ce606c..29cdcd02ead3 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c @@ -8,6 +8,7 @@ #include <linux/shm.h> #include <linux/ipc.h> #include <linux/compat.h> +#include <asm/sys_ia32.h> asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 2e09dcd3c0a6..6c0d7f6231af 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -44,8 +44,8 @@ #include <asm/types.h> #include <asm/uaccess.h> #include <asm/atomic.h> -#include <asm/ia32.h> #include <asm/vgtod.h> +#include <asm/sys_ia32.h> #define AA(__x) ((unsigned long)(__x)) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 25caa0738af5..ab1d51a8855e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -54,7 +54,6 @@ extern int disable_apic; extern int is_vsmp_box(void); extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); -extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); @@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg) } #ifndef CONFIG_X86_32 -extern int x2apic, x2apic_preenabled; +extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index ce547f24a1cd..d8dd9f537911 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -9,12 +9,12 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return &cpu_online_map; #else - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); #endif } @@ -79,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS) + if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); return BAD_APICID; @@ -94,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; /* Mapping from cpu number to logical apicid */ static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= NR_CPUS) + if (cpu >= nr_cpu_ids) return BAD_APICID; return cpu_physical_id(cpu); } @@ -119,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) } /* As we are using single CPU as destination, pick only one CPU here */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int cpu; int apicid; - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return cpu_to_logical_apicid(cpu); + + return BAD_APICID; +} + static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) { return cpuid_apic >> index_msb; diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 9404c535b7ec..27fcd01b3ae6 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h @@ -1,25 +1,22 @@ #ifndef __ASM_MACH_IPI_H #define __ASM_MACH_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_MACH_IPI_H */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b072..dc27705f5443 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); } -#define SYS_VECTOR_FREE 0 -#define SYS_VECTOR_ALLOCED 1 - extern int first_system_vector; -extern char system_vectors[]; +/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ +extern unsigned long used_vectors[]; static inline void alloc_system_vector(int vector) { - if (system_vectors[vector] == SYS_VECTOR_FREE) { - system_vectors[vector] = SYS_VECTOR_ALLOCED; + if (!test_bit(vector, used_vectors)) { + set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; } else diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a2e545c91c35..ca5ffb2856b6 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ +extern int add_efi_memmap; extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index e24ef876915f..51ac1230294e 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -9,14 +9,14 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus_cluster(void) +static inline const cpumask_t *target_cpus_cluster(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return cpumask_of_cpu(smp_processor_id()); + return &cpumask_of_cpu(smp_processor_id()); } #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) @@ -80,9 +80,10 @@ extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? - "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); + "Physical Cluster" : "Logical Cluster", + nr_ioapics, cpus_addr(*target_cpus())[0]); } static inline int multi_timer_check(int apic, int irq) @@ -100,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) { if (!mps_cpu) return boot_cpu_physical_apicid; - else if (mps_cpu < NR_CPUS) + else if (mps_cpu < nr_cpu_ids) return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -120,9 +121,9 @@ extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; - return (int)cpu_2_logical_apicid[cpu]; + if (cpu >= nr_cpu_ids) + return BAD_APICID; + return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); #endif @@ -146,14 +147,15 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) +static inline unsigned int +cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpumask_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return 0xFF; @@ -161,10 +163,10 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -179,14 +181,14 @@ static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) return apicid; } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return cpu_to_logical_apicid(0); @@ -194,10 +196,52 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, *cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); + return cpu_to_logical_apicid(0); + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid = cpu_to_logical_apicid(0); + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return apicid; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) + goto exit; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = cpumask_first(cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpumask_test_cpu(cpu, cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -209,6 +253,8 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) } cpu++; } +exit: + free_cpumask_var(cpumask); return apicid; } diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 632a955fcc0a..7e8ed24d4b8a 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h @@ -1,24 +1,22 @@ #ifndef __ASM_ES7000_IPI_H #define __ASM_ES7000_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_ES7000_IPI_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 0ac17d33a8c7..746f37a7963a 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -24,7 +24,7 @@ struct genapic { int (*probe)(void); int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); + const struct cpumask *(*target_cpus)(void); int int_delivery_mode; int int_dest_mode; int ESR_DISABLE; @@ -57,12 +57,16 @@ struct genapic { unsigned (*get_apic_id)(unsigned long x); unsigned long apic_id_mask; - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); - cpumask_t (*vector_allocation_domain)(int cpu); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); #ifdef CONFIG_SMP /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif @@ -114,6 +118,7 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ + APICFUNC(cpu_mask_to_apicid_and) \ APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 2cae011668b7..adf32fb56aa6 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_GENAPIC_64_H #define _ASM_X86_GENAPIC_64_H +#include <linux/cpumask.h> + /* * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 @@ -18,16 +20,20 @@ struct genapic { u32 int_delivery_mode; u32 int_dest_mode; int (*apic_id_registered)(void); - cpumask_t (*target_cpus)(void); - cpumask_t (*vector_allocation_domain)(int cpu); + const struct cpumask *(*target_cpus)(void); + void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); /* ipi */ - void (*send_IPI_mask)(cpumask_t mask, int vector); + void (*send_IPI_mask)(const struct cpumask *mask, int vector); + void (*send_IPI_mask_allbutself)(const struct cpumask *mask, + int vector); void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); void (*send_IPI_self)(int vector); /* */ - unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); + unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); + unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, + const struct cpumask *andmask); unsigned int (*phys_pkg_id)(int index_msb); unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa9..c745a306f7d3 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h @@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, native_apic_mem_write(APIC_ICR, cfg); } -static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) +static inline void send_IPI_mask_sequence(const struct cpumask *mask, + int vector) { unsigned long flags; unsigned long query_cpu; @@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask_nr(query_cpu, mask) { + for_each_cpu(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } +static inline void send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) + __send_IPI_dest_field( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); +} + #endif /* _ASM_X86_IPI_H */ diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 28e409fc73f3..592688ed04d3 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -33,7 +33,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> -extern void fixup_irqs(cpumask_t map); +extern void fixup_irqs(void); #endif extern unsigned int do_IRQ(struct pt_regs *regs); @@ -42,5 +42,6 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); +extern int vector_used_by_percpu_irq(unsigned int vector); #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa1..97215a458e5f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -21,6 +21,7 @@ #include <asm/pvclock-abi.h> #include <asm/desc.h> +#include <asm/mtrr.h> #define KVM_MAX_VCPUS 16 #define KVM_MEMORY_SLOTS 32 @@ -86,6 +87,7 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; @@ -180,6 +182,8 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head oos_link; + /* * The following two entries are used to key the shadow page in the * hash table. @@ -190,13 +194,16 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - unsigned long slot_bitmap; /* One bit set per slot which has memory - * in this shadow page. - */ + /* + * One bit set per slot which has memory + * in this shadow page. + */ + DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool unsync_children; + bool global; + unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ @@ -327,8 +334,10 @@ struct kvm_vcpu_arch { bool nmi_pending; bool nmi_injected; + bool nmi_window_open; - u64 mtrr[0x100]; + struct mtrr_state_type mtrr_state; + u32 pat; }; struct kvm_mem_alias { @@ -350,11 +359,13 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; + struct list_head oos_global_pages; struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; + int vapics_in_nmi_mode; int round_robin_prev_vcpu; unsigned int tss_addr; @@ -378,6 +389,7 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; + u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -397,6 +409,7 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; + u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -405,6 +418,7 @@ struct kvm_vcpu_stat { u32 insn_emulation_fail; u32 hypercalls; u32 irq_injections; + u32 nmi_injections; }; struct descriptor_table { @@ -477,6 +491,7 @@ struct kvm_x86_ops { int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); + int (*get_mt_mask_shift)(void); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector); void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes); + const u8 *new, int bytes, + bool guest_initiated); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); @@ -607,6 +624,8 @@ void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); + static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) { struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); @@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - #define MSR_IA32_TIME_STAMP_COUNTER 0x010 #define TSS_IOPB_BASE_OFFSET 0x66 diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f208..6a159732881a 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -123,6 +123,7 @@ struct decode_cache { u8 ad_bytes; u8 rex_prefix; struct operand src; + struct operand src2; struct operand dst; bool has_seg_override; u8 seg_override; @@ -146,22 +147,18 @@ struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; - /* Linear faulting address (if emulating a page-faulting instruction) */ unsigned long eflags; - /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - u32 cs_base; /* decode cache */ - struct decode_cache decode; }; /* Repeat String Operation Prefix */ -#define REPE_PREFIX 1 -#define REPNE_PREFIX 2 +#define REPE_PREFIX 1 +#define REPNE_PREFIX 2 /* Execution mode, passed to the emulator. */ #define X86EMUL_MODE_REAL 0 /* Real mode. */ @@ -170,7 +167,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ /* Host execution mode. */ -#if defined(__i386__) +#if defined(CONFIG_X86_32) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 #elif defined(CONFIG_X86_64) #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index 6cb3a467e067..cc09cbbee27e 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -8,12 +8,12 @@ #define APIC_DFR_VALUE (APIC_DFR_FLAT) -static inline cpumask_t target_cpus(void) +static inline const struct cpumask *target_cpus(void) { #ifdef CONFIG_SMP - return cpu_online_map; + return cpu_online_mask; #else - return cpumask_of_cpu(0); + return cpumask_of(0); #endif } @@ -28,6 +28,7 @@ static inline cpumask_t target_cpus(void) #define apic_id_registered (genapic->apic_id_registered) #define init_apic_ldr (genapic->init_apic_ldr) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define phys_pkg_id (genapic->phys_pkg_id) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) @@ -61,9 +62,19 @@ static inline int apic_id_registered(void) return physid_isset(read_apic_id(), phys_cpu_present_map); } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) { - return cpus_addr(cpumask)[0]; + return cpumask_bits(cpumask)[0]; +} + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + unsigned long mask1 = cpumask_bits(cpumask)[0]; + unsigned long mask2 = cpumask_bits(andmask)[0]; + unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; + + return (unsigned int)(mask1 & mask2 & mask3); } static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) @@ -88,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid) #endif } -static inline cpumask_t vector_allocation_domain(int cpu) +static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -98,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; } #endif @@ -131,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) + if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index fabca01ebacf..191312d155da 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h @@ -4,7 +4,8 @@ /* Avoid include hell */ #define NMI_VECTOR 0x02 -void send_IPI_mask_bitmask(cpumask_t mask, int vector); +void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); void __send_IPI_shortcut(unsigned int shortcut, int vector); extern int no_broadcast; @@ -12,28 +13,27 @@ extern int no_broadcast; #ifdef CONFIG_X86_64 #include <asm/genapic.h> #define send_IPI_mask (genapic->send_IPI_mask) +#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) #else -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_bitmask(mask, vector); } +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); #endif static inline void __local_send_IPI_allbutself(int vector) { - if (no_broadcast || vector == NMI_VECTOR) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); - send_IPI_mask(mask, vector); - } else + if (no_broadcast || vector == NMI_VECTOR) + send_IPI_mask_allbutself(cpu_online_mask, vector); + else __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void __local_send_IPI_all(int vector) { if (no_broadcast || vector == NMI_VECTOR) - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector); } diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index e430f47df667..48553e958ad5 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -24,6 +24,7 @@ #define check_phys_apicid_present (genapic->check_phys_apicid_present) #define check_apicid_used (genapic->check_apicid_used) #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) +#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 91885c28f66b..62d14ce3cd00 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -6,13 +6,13 @@ #include <asm/mpspec_def.h> extern int apic_version[MAX_APICS]; +extern int pic_mode; #ifdef CONFIG_X86_32 #include <mach_mpspec.h> extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; -extern int pic_mode; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31e..cb988aab716d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -57,6 +57,31 @@ struct mtrr_gentry { }; #endif /* !__i386__ */ +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) + /* These are the various ioctls */ #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 0bf2a06b7a4e..c80f00d29965 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -7,9 +7,9 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { - return CPU_MASK_ALL; + return &CPU_MASK_ALL; } #define NO_BALANCE_IRQ (1) @@ -122,7 +122,13 @@ static inline void enable_apic_mode(void) * We use physical apicids here, not logical, so just return the default * physical broadcast to stop people from breaking us */ -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) +{ + return (int) 0xF; +} + +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { return (int) 0xF; } diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index 935588d286cf..a8374c652778 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h @@ -1,25 +1,22 @@ #ifndef __ASM_NUMAQ_IPI_H #define __ASM_NUMAQ_IPI_H -void send_IPI_mask_sequence(cpumask_t, int vector); +void send_IPI_mask_sequence(const struct cpumask *mask, int vector); +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const struct cpumask *mask, int vector) { send_IPI_mask_sequence(mask, vector); } static inline void send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - - if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask_allbutself(cpu_online_mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(cpu_online_mask, vector); } #endif /* __ASM_NUMAQ_IPI_H */ diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h index 1959018aac02..e60fd3e14bdf 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/include/asm/pci_x86.h @@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops; struct irq_info { u8 bus, devfn; /* Bus, device and function */ struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ u16 bitmap; /* Available IRQs */ } __attribute__((packed)) irq[4]; u8 slot; /* Slot number, 0=onboard */ @@ -69,11 +70,13 @@ struct irq_routing_table { u16 version; /* PIRQ_VERSION */ u16 size; /* Table size in bytes */ u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ u32 miniport_data; /* Crap */ u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ + u8 checksum; /* Modulo 256 checksum must give 0 */ struct irq_info slots[0]; } __attribute__((packed)); @@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos) static inline void mmio_config_writeb(void __iomem *pos, u8 val) { - asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writew(void __iomem *pos, u16 val) { - asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); } static inline void mmio_config_writel(void __iomem *pos, u32 val) { - asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); } diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d9..830b9fcb6427 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -60,7 +60,7 @@ struct smp_ops { void (*cpu_die)(unsigned int cpu); void (*play_dead)(void); - void (*send_call_func_ipi)(cpumask_t mask); + void (*send_call_func_ipi)(const struct cpumask *mask); void (*send_call_func_single_ipi)(int cpu); }; @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) static inline void arch_send_call_function_ipi(cpumask_t mask) { - smp_ops.send_call_func_ipi(mask); + smp_ops.send_call_func_ipi(&mask); } void cpu_disable_common(void); @@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); -void native_send_call_func_ipi(cpumask_t mask); +void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_single_ipi(int cpu); extern void prefill_possible_map(void); diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9b3070f1c2ac..99327d1be49f 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -14,13 +14,13 @@ #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -static inline cpumask_t target_cpus(void) +static inline const cpumask_t *target_cpus(void) { /* CPU_MASK_ALL (0xff) has undefined behaviour with * dest_LowestPrio mode logical clustered apic interrupt routing * Just start on cpu 0. IRQ balancing will spread load */ - return cpumask_of_cpu(0); + return &cpumask_of_cpu(0); } #define INT_DELIVERY_MODE (dest_LowestPrio) @@ -137,14 +137,14 @@ static inline void enable_apic_mode(void) { } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) { int num_bits_set; int cpus_found = 0; int cpu; int apicid; - num_bits_set = cpus_weight(cpumask); + num_bits_set = cpus_weight(*cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) return (int) 0xFF; @@ -152,10 +152,10 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. */ - cpu = first_cpu(cpumask); + cpu = first_cpu(*cpumask); apicid = cpu_to_logical_apicid(cpu); while (cpus_found < num_bits_set) { - if (cpu_isset(cpu, cpumask)) { + if (cpu_isset(cpu, *cpumask)) { int new_apicid = cpu_to_logical_apicid(cpu); if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ @@ -170,6 +170,49 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) return apicid; } +static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, + const struct cpumask *andmask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid = 0xFF; + cpumask_var_t cpumask; + + if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) + return (int) 0xFF; + + cpumask_and(cpumask, inmask, andmask); + cpumask_and(cpumask, cpumask, cpu_online_mask); + + num_bits_set = cpumask_weight(cpumask); + /* Return id to all */ + if (num_bits_set == nr_cpu_ids) + goto exit; + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = cpumask_first(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpumask_test_cpu(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); + return 0xFF; + } + apicid = apicid | new_apicid; + cpus_found++; + } + cpu++; + } +exit: + free_cpumask_var(cpumask); + return apicid; +} + /* cpuid returns the value latched in the HW at reset, not the APIC ID * register's value. For any box whose BIOS changes APIC IDs, like * clustered APIC systems, we must use hard_smp_processor_id. diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index 53bd1e7bd7b4..a8a2c24f50cc 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h @@ -1,9 +1,10 @@ #ifndef __ASM_SUMMIT_IPI_H #define __ASM_SUMMIT_IPI_H -void send_IPI_mask_sequence(cpumask_t mask, int vector); +void send_IPI_mask_sequence(const cpumask_t *mask, int vector); +void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); -static inline void send_IPI_mask(cpumask_t mask, int vector) +static inline void send_IPI_mask(const cpumask_t *mask, int vector) { send_IPI_mask_sequence(mask, vector); } @@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) cpu_clear(smp_processor_id(), mask); if (!cpus_empty(mask)) - send_IPI_mask(mask, vector); + send_IPI_mask(&mask, vector); } static inline void send_IPI_all(int vector) { - send_IPI_mask(cpu_online_map, vector); + send_IPI_mask(&cpu_online_map, vector); } #endif /* __ASM_SUMMIT_IPI_H */ diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e869..1b8afa78e869 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/include/asm/svm.h diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h new file mode 100644 index 000000000000..ffb08be2a530 --- /dev/null +++ b/arch/x86/include/asm/sys_ia32.h @@ -0,0 +1,101 @@ +/* + * sys_ia32.h - Linux ia32 syscall interfaces + * + * Copyright (c) 2008 Jaswinder Singh Rajput + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYS_IA32_H +#define _ASM_X86_SYS_IA32_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> +#include <asm/compat.h> +#include <asm/ia32.h> + +/* ia32/sys_ia32.c */ +asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); + +asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long sys32_fstatat(unsigned int, char __user *, + struct stat64 __user *, int); +struct mmap_arg_struct; +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); +asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); + +asmlinkage long sys32_pipe(int __user *); +struct sigaction32; +struct old_sigaction32; +asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, + struct sigaction32 __user *, unsigned int); +asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, + struct old_sigaction32 __user *); +asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, + compat_sigset_t __user *, unsigned int); +asmlinkage long sys32_alarm(unsigned int); + +struct sel_arg_struct; +asmlinkage long sys32_old_select(struct sel_arg_struct __user *); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_sysfs(int, u32, u32); + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, + struct compat_timespec __user *); +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); +asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); + +#ifdef CONFIG_SYSCTL_SYSCALL +struct sysctl_ia32; +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); +#endif + +asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); + +asmlinkage long sys32_personality(unsigned long); +asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); + +asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + +struct oldold_utsname; +struct old_utsname; +asmlinkage long sys32_olduname(struct oldold_utsname __user *); +long sys32_uname(struct old_utsname __user *); + +long sys32_ustat(unsigned, struct ustat32 __user *); + +asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, + compat_uptr_t __user *, struct pt_regs *); +asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); + +long sys32_lseek(unsigned int, int, unsigned int); +long sys32_kill(int, int); +long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +long sys32_vm86_warning(void); +long sys32_lookup_dcookie(u32, u32, char __user *, size_t); + +asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); +asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, + unsigned, unsigned, int); +asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); +asmlinkage long sys32_fallocate(int, int, unsigned, + unsigned, unsigned, unsigned); + +/* ia32/ia32_signal.c */ +asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, + stack_ia32_t __user *, struct pt_regs *); +asmlinkage long sys32_sigreturn(struct pt_regs *); +asmlinkage long sys32_rt_sigreturn(struct pt_regs *); + +/* ia32/ipc32.c */ +asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); +#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed7..79e31e9dcdda 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -226,6 +226,8 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) /* indicates that pointers to the topology cpumask_t maps are valid */ #define arch_provides_topology_pointers yes diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e2363253bbbf..50423c7b56b2 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -133,61 +133,61 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - int dest_subnodeid:6; /* must be zero */ + unsigned int dest_subnodeid:6; /* must be zero */ /* bits 5:0 */ - int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ - /* bits 20:6 */ - int command:8; /* message type */ + unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ + /* bits 20:6 */ /* first bit in node_map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ - int rsvd_1:3; /* must be zero */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ /* int will align on 32 bits */ - int rsvd_2:9; /* must be zero */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - int payload_2a:8; /* becomes byte 16 of msg */ + unsigned int payload_2a:8;/* becomes byte 16 of msg */ /* bits 48:41 */ /* not currently using */ - int payload_2b:8; /* becomes byte 17 of msg */ + unsigned int payload_2b:8;/* becomes byte 17 of msg */ /* bits 56:49 */ /* not currently using */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ - int rsvd_3:1; /* must be zero */ + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ /* these 24 bits become bytes 12-14 of msg */ - int replied_to:1; /* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ /* bit 58 */ - int payload_1a:5; /* not currently used */ + unsigned int payload_1a:5;/* not currently used */ /* bits 63:59 */ - int payload_1b:8; /* not currently used */ + unsigned int payload_1b:8;/* not currently used */ /* bits 71:64 */ - int payload_1c:8; /* not currently used */ + unsigned int payload_1c:8;/* not currently used */ /* bits 79:72 */ - int payload_1d:2; /* not currently used */ + unsigned int payload_1d:2;/* not currently used */ /* bits 81:80 */ - int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - int sw_ack_flag:1; /* software acknowledge flag */ + unsigned int sw_ack_flag:1;/* software acknowledge flag */ /* bit 89 */ /* INTD trasactions at destination are to wait for software acknowledge */ - int rsvd_5:6; /* must be zero */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - int int_both:1; /* if 1, interrupt both sockets on the blade */ + unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ /* bit 101*/ - int fairness:3; /* usually zero */ + unsigned int fairness:3;/* usually zero */ /* bits 104:102 */ - int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast format */ /* bit 105 */ /* 0 for TLB: endpoint multi-unicast messages */ - int chaining:1; /* next descriptor is part of this activation*/ + unsigned int chaining:1;/* next descriptor is part of this activation*/ /* bit 106 */ - int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 000000000000..593636275238 --- /dev/null +++ b/arch/x86/include/asm/virtext.h @@ -0,0 +1,132 @@ +/* CPU virtualization extensions handling + * + * This should carry the code for handling CPU virtualization extensions + * that needs to live in the kernel core. + * + * Author: Eduardo Habkost <ehabkost@redhat.com> + * + * Copyright (C) 2008, Red Hat Inc. + * + * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#ifndef _ASM_X86_VIRTEX_H +#define _ASM_X86_VIRTEX_H + +#include <asm/processor.h> +#include <asm/system.h> + +#include <asm/vmx.h> +#include <asm/svm.h> + +/* + * VMX functions: + */ + +static inline int cpu_has_vmx(void) +{ + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ +} + + +/** Disable VMX on the current CPU + * + * vmxoff causes a undefined-opcode exception if vmxon was not run + * on the CPU previously. Only call this function if you know VMX + * is enabled. + */ +static inline void cpu_vmxoff(void) +{ + asm volatile (ASM_VMX_VMXOFF : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); +} + +static inline int cpu_vmx_enabled(void) +{ + return read_cr4() & X86_CR4_VMXE; +} + +/** Disable VMX if it is enabled on the current CPU + * + * You shouldn't call this if cpu_has_vmx() returns 0. + */ +static inline void __cpu_emergency_vmxoff(void) +{ + if (cpu_vmx_enabled()) + cpu_vmxoff(); +} + +/** Disable VMX if it is supported and enabled on the current CPU + */ +static inline void cpu_emergency_vmxoff(void) +{ + if (cpu_has_vmx()) + __cpu_emergency_vmxoff(); +} + + + + +/* + * SVM functions: + */ + +/** Check if the CPU has SVM support + * + * You can use the 'msg' arg to get a message describing the problem, + * if the function returns zero. Simply pass NULL if you are not interested + * on the messages; gcc should take care of not generating code for + * the messages on this case. + */ +static inline int cpu_has_svm(const char **msg) +{ + uint32_t eax, ebx, ecx, edx; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + if (msg) + *msg = "not amd"; + return 0; + } + + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if (eax < SVM_CPUID_FUNC) { + if (msg) + *msg = "can't execute cpuid_8000000a"; + return 0; + } + + cpuid(0x80000001, &eax, &ebx, &ecx, &edx); + if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { + if (msg) + *msg = "svm not available"; + return 0; + } + return 1; +} + + +/** Disable SVM on the current CPU + * + * You should call this only if cpu_has_svm() returned true. + */ +static inline void cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); +} + +/** Makes sure SVM is disabled, if it is supported on the CPU + */ +static inline void cpu_emergency_svm_disable(void) +{ + if (cpu_has_svm(NULL)) + cpu_svm_disable(); +} + +#endif /* _ASM_X86_VIRTEX_H */ diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h index ec5edc339da6..d0238e6151d8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -63,10 +63,13 @@ #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 /* VMCS Encodings */ enum vmcs_field { @@ -112,6 +115,8 @@ enum vmcs_field { VMCS_LINK_POINTER_HIGH = 0x00002801, GUEST_IA32_DEBUGCTL = 0x00002802, GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, GUEST_PDPTR0 = 0x0000280a, GUEST_PDPTR0_HIGH = 0x0000280b, GUEST_PDPTR1 = 0x0000280c, @@ -120,6 +125,8 @@ enum vmcs_field { GUEST_PDPTR2_HIGH = 0x0000280f, GUEST_PDPTR3 = 0x00002810, GUEST_PDPTR3_HIGH = 0x00002811, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, PIN_BASED_VM_EXEC_CONTROL = 0x00004000, CPU_BASED_VM_EXEC_CONTROL = 0x00004002, EXCEPTION_BITMAP = 0x00004004, @@ -331,8 +338,9 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 +#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 @@ -356,4 +364,19 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul + +#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" +#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" +#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" +#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" +#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" +#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" +#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" +#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" +#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" + + + #endif diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2e2da717b350..658e29e0f49b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) * we don't need to preallocate the protection domains anymore. * For now we have to. */ -void prealloc_protection_domains(void) +static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c625800c55ca..fb85e8d466cc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -void __init iommu_enable(struct amd_iommu *iommu) +static void __init iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " "at %02x:%02x.%x cap 0x%hx\n", @@ -256,7 +256,7 @@ void __init iommu_enable(struct amd_iommu *iommu) } /* Function to enable IOMMU event logging and event interrupts */ -void __init iommu_enable_event_logging(struct amd_iommu *iommu) +static void __init iommu_enable_event_logging(struct amd_iommu *iommu) { iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index b5229affb953..d652515e2855 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer); #ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; +static int x2apic_preenabled; +static int disable_x2apic; static __init int setup_nox2apic(char *str) { disable_x2apic = 1; @@ -119,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); int first_system_vector = 0xfe; -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - /* * Debug level, exported for io_apic.c */ @@ -142,7 +140,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); +static void lapic_timer_broadcast(const cpumask_t *mask); static void apic_pm_activate(void); /* @@ -228,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id) apic_write(APIC_ICR, low); } -u64 xapic_icr_read(void) +static u64 xapic_icr_read(void) { u32 icr1, icr2; @@ -268,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id) wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); } -u64 x2apic_icr_read(void) +static u64 x2apic_icr_read(void) { unsigned long val; @@ -455,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(cpumask_t mask) +static void lapic_timer_broadcast(const cpumask_t *mask) { #ifdef CONFIG_SMP send_IPI_mask(mask, LOCAL_TIMER_VECTOR); @@ -471,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void) struct clock_event_device *levt = &__get_cpu_var(lapic_events); memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); + levt->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(levt); } @@ -1807,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup) void __cpuinit generic_processor_info(int apicid, int version) { int cpu; - cpumask_t tmp_map; /* * Validate version */ if (version == 0x0) { pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); + "fixing up to 0x10. (tell your hw vendor)\n", + version); version = 0x10; } apic_version[apicid] = version; - if (num_processors >= NR_CPUS) { - pr_warning("WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); + if (num_processors >= nr_cpu_ids) { + int max = nr_cpu_ids; + int thiscpu = max + disabled_cpus; + + pr_warning( + "ACPI: NR_CPUS/possible_cpus limit of %i reached." + " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; return; } num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); + cpu = cpumask_next_zero(-1, cpu_present_mask); physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { @@ -1878,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version) } #endif - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); } #ifdef CONFIG_X86_64 @@ -2081,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void) bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { /* are we being called early in kernel startup? */ if (bios_cpu_apicid) { id = bios_cpu_apicid[i]; diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 2a0a2a3cac26..f63882728d91 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,7 +25,7 @@ #include <asm/uv/bios.h> #include <asm/uv/uv_hub.h> -struct uv_systab uv_systab; +static struct uv_systab uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 68b5d8681cbb..c6ecda64f5f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static int __cpuinit detect_cache_attributes(unsigned int cpu) +static void get_cpu_leaves(void *_retval) { - struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; - cpumask_t oldmask; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) - return -ENOMEM; - - oldmask = current->cpus_allowed; - retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - if (retval) - goto out; + int j, *retval = _retval, cpu = smp_processor_id(); /* Do cpuid and store the results */ for (j = 0; j < num_cache_leaves; j++) { + struct _cpuid4_info *this_leaf; this_leaf = CPUID4_INFO_IDX(cpu, j); - retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) { + *retval = cpuid4_cache_lookup(j, this_leaf); + if (unlikely(*retval < 0)) { int i; for (i = 0; i < j; i++) @@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) } cache_shared_cpu_map_setup(cpu, j); } - set_cpus_allowed_ptr(current, &oldmask); +} + +static int __cpuinit detect_cache_attributes(unsigned int cpu) +{ + int retval; + + if (num_cache_leaves == 0) + return -ENOENT; + + per_cpu(cpuid4_info, cpu) = kzalloc( + sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); + if (per_cpu(cpuid4_info, cpu) == NULL) + return -ENOMEM; -out: + smp_call_function_single(cpu, get_cpu_leaves, &retval, true); if (retval) { kfree(per_cpu(cpuid4_info, cpu)); per_cpu(cpuid4_info, cpu) = NULL; @@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, cpumask_t *mask = &this_leaf->shared_cpu_map; n = type? - cpulist_scnprintf(buf, len-2, *mask): - cpumask_scnprintf(buf, len-2, *mask); + cpulist_scnprintf(buf, len-2, mask) : + cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 748c8f9e7a05..a5a5e0530370 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ * CPU Initialization */ +struct thresh_restart { + struct threshold_block *b; + int reset; + u16 old_limit; +}; + /* must be called with correct cpu affinity */ -static void threshold_restart_bank(struct threshold_block *b, - int reset, u16 old_limit) +static long threshold_restart_bank(void *_tr) { + struct thresh_restart *tr = _tr; u32 mci_misc_hi, mci_misc_lo; - rdmsr(b->address, mci_misc_lo, mci_misc_hi); + rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); - if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) - reset = 1; /* limit cannot be lower than err count */ + if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) + tr->reset = 1; /* limit cannot be lower than err count */ - if (reset) { /* reset err count and overflow bit */ + if (tr->reset) { /* reset err count and overflow bit */ mci_misc_hi = (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | - (THRESHOLD_MAX - b->threshold_limit); - } else if (old_limit) { /* change limit w/o reset */ + (THRESHOLD_MAX - tr->b->threshold_limit); + } else if (tr->old_limit) { /* change limit w/o reset */ int new_count = (mci_misc_hi & THRESHOLD_MAX) + - (old_limit - b->threshold_limit); + (tr->old_limit - tr->b->threshold_limit); mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | (new_count & THRESHOLD_MAX); } - b->interrupt_enable ? + tr->b->interrupt_enable ? (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : (mci_misc_hi &= ~MASK_INT_TYPE_HI); mci_misc_hi |= MASK_COUNT_EN_HI; - wrmsr(b->address, mci_misc_lo, mci_misc_hi); + wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); + return 0; } /* cpu init entry point, called from mce.c with preempt off */ @@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u8 lvt_off; u32 low = 0, high = 0, address = 0; + struct thresh_restart tr; for (bank = 0; bank < NR_BANKS; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) wrmsr(address, low, high); threshold_defaults.address = address; - threshold_restart_bank(&threshold_defaults, 0, 0); + tr.b = &threshold_defaults; + tr.reset = 0; + tr.old_limit = 0; + threshold_restart_bank(&tr); } } } @@ -251,20 +262,6 @@ struct threshold_attr { ssize_t(*store) (struct threshold_block *, const char *, size_t count); }; -static void affinity_set(unsigned int cpu, cpumask_t *oldmask, - cpumask_t *newmask) -{ - *oldmask = current->cpus_allowed; - cpus_clear(*newmask); - cpu_set(cpu, *newmask); - set_cpus_allowed_ptr(current, newmask); -} - -static void affinity_restore(const cpumask_t *oldmask) -{ - set_cpus_allowed_ptr(current, oldmask); -} - #define SHOW_FIELDS(name) \ static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ { \ @@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask, newmask; + struct thresh_restart tr; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; b->interrupt_enable = !!new; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 0, 0); - affinity_restore(&oldmask); + tr.b = b; + tr.reset = 0; + tr.old_limit = 0; + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return end - buf; } @@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, const char *buf, size_t count) { char *end; - cpumask_t oldmask, newmask; - u16 old; + struct thresh_restart tr; unsigned long new = simple_strtoul(buf, &end, 0); if (end == buf) return -EINVAL; @@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b, new = THRESHOLD_MAX; if (new < 1) new = 1; - old = b->threshold_limit; + tr.old_limit = b->threshold_limit; b->threshold_limit = new; + tr.b = b; + tr.reset = 0; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 0, old); - affinity_restore(&oldmask); + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return end - buf; } -static ssize_t show_error_count(struct threshold_block *b, char *buf) +static long local_error_count(void *_b) { - u32 high, low; - cpumask_t oldmask, newmask; - affinity_set(b->cpu, &oldmask, &newmask); + struct threshold_block *b = _b; + u32 low, high; + rdmsr(b->address, low, high); - affinity_restore(&oldmask); - return sprintf(buf, "%x\n", - (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); + return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); +} + +static ssize_t show_error_count(struct threshold_block *b, char *buf) +{ + return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); } static ssize_t store_error_count(struct threshold_block *b, const char *buf, size_t count) { - cpumask_t oldmask, newmask; - affinity_set(b->cpu, &oldmask, &newmask); - threshold_restart_bank(b, 1, 0); - affinity_restore(&oldmask); + struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; + + work_on_cpu(b->cpu, threshold_restart_bank, &tr); return 1; } @@ -463,12 +462,19 @@ out_free: return err; } +static long local_allocate_threshold_blocks(void *_bank) +{ + unsigned int *bank = _bank; + + return allocate_threshold_blocks(smp_processor_id(), *bank, 0, + MSR_IA32_MC0_MISC + *bank * 4); +} + /* symlinks sibling shared banks to first core. first core owns dir/files. */ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) { int i, err = 0; struct threshold_bank *b = NULL; - cpumask_t oldmask, newmask; char name[32]; sprintf(name, "threshold_bank%i", bank); @@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) per_cpu(threshold_banks, cpu)[bank] = b; - affinity_set(cpu, &oldmask, &newmask); - err = allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); - affinity_restore(&oldmask); - + err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); if (err) goto out_free; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01eeb..b59ddcc88cd8 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -14,14 +14,6 @@ #include <asm/pat.h> #include "mtrr.h" -struct mtrr_state { - struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; - mtrr_type fixed_ranges[NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - struct fixed_range_block { int base_msr; /* start address of an MTRR block */ int ranges; /* number of MTRRs in this block */ @@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { }; static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; static int mtrr_state_set; u64 mtrr_tom2; +struct mtrr_state_type mtrr_state = {}; +EXPORT_SYMBOL_GPL(mtrr_state); + #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 1159e269e596..d259e5d2e054 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -49,7 +49,7 @@ u32 num_var_ranges = 0; -unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; static DEFINE_MUTEX(mtrr_mutex); u64 size_or_mask, size_and_mask; @@ -574,7 +574,7 @@ struct mtrr_value { unsigned long lsize; }; -static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; +static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; static int mtrr_save(struct sys_device * sysdev, pm_message_t state) { @@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata = static int __init disable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 0; + enable_mtrr_cleanup = 0; return 0; } early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); static int __init enable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 1; + enable_mtrr_cleanup = 1; return 0; } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b23..ffd60409cc6d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h @@ -8,11 +8,6 @@ #define MTRRcap_MSR 0x0fe #define MTRRdefType_MSR 0x2ff -#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) -#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) - -#define NUM_FIXED_RANGES 88 -#define MAX_VAR_RANGES 256 #define MTRRfix64K_00000_MSR 0x250 #define MTRRfix16K_80000_MSR 0x258 #define MTRRfix16K_A0000_MSR 0x259 @@ -29,11 +24,7 @@ #define MTRR_CHANGE_MASK_VARIABLE 0x02 #define MTRR_CHANGE_MASK_DEFTYPE 0x04 -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; +extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; struct mtrr_ops { u32 vendor; @@ -70,13 +61,6 @@ struct set_mtrr_context { u32 ccr3; }; -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - void set_mtrr_done(struct set_mtrr_context *ctxt); void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649b..85d28d53f5d3 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -39,10 +39,10 @@ #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> +#include <linux/uaccess.h> #include <asm/processor.h> #include <asm/msr.h> -#include <asm/uaccess.h> #include <asm/system.h> static struct class *cpuid_class; @@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) } static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t * ppos) + size_t count, loff_t *ppos) { char __user *tmp = buf; struct cpuid_regs cmd; @@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; int ret = 0; - + lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index d84a852e4cd7..c689d19e35ab 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -26,6 +26,7 @@ #include <linux/kdebug.h> #include <asm/smp.h> #include <asm/reboot.h> +#include <asm/virtext.h> #include <mach_ipi.h> @@ -49,6 +50,15 @@ static void kdump_nmi_callback(int cpu, struct die_args *args) #endif crash_save_cpu(regs, cpu); + /* Disable VMX or SVM if needed. + * + * We need to disable virtualization on all CPUs. + * Having VMX or SVM enabled on any CPU may break rebooting + * after the kdump kernel has finished its task. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + disable_local_APIC(); } @@ -80,6 +90,14 @@ void native_machine_crash_shutdown(struct pt_regs *regs) local_irq_disable(); kdump_nmi_shootdown_cpus(); + + /* Booting kdump kernel with VMX or SVM enabled won't work, + * because (among other limitations) we can't disable paging + * with the virt flags. + */ + cpu_emergency_vmxoff(); + cpu_emergency_svm_disable(); + lapic_shutdown(); #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 23b138e31e9c..504ad198e4ad 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) va_list ap; va_start(ap, fmt); - n = vscnprintf(buf, 512, fmt, ap); + n = vscnprintf(buf, sizeof(buf), fmt, ap); early_console->write(early_console, buf, n); va_end(ap); } diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda4..34185488e4fb 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 1; } -static cpumask_t flat_target_cpus(void) +static const struct cpumask *flat_target_cpus(void) { - return cpu_online_map; + return cpu_online_mask; } -static cpumask_t flat_vector_allocation_domain(int cpu) +static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + cpumask_clear(retmask); + cpumask_bits(retmask)[0] = APIC_ALL_CPUS; } /* @@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void) apic_write(APIC_LDR, val); } -static void flat_send_IPI_mask(cpumask_t cpumask, int vector) +static inline void _flat_send_IPI_mask(unsigned long mask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; unsigned long flags; local_irq_save(flags); @@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } +static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + + _flat_send_IPI_mask(mask, vector); +} + +static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, + int vector) +{ + unsigned long mask = cpumask_bits(cpumask)[0]; + int cpu = smp_processor_id(); + + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); + _flat_send_IPI_mask(mask, vector); +} + static void flat_send_IPI_allbutself(int vector) { + int cpu = smp_processor_id(); #ifdef CONFIG_HOTPLUG_CPU int hotplug = 1; #else int hotplug = 0; #endif if (hotplug || vector == NMI_VECTOR) { - cpumask_t allbutme = cpu_online_map; + if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { + unsigned long mask = cpumask_bits(cpu_online_mask)[0]; - cpu_clear(smp_processor_id(), allbutme); + if (cpu < BITS_PER_LONG) + clear_bit(cpu, &mask); - if (!cpus_empty(allbutme)) - flat_send_IPI_mask(allbutme, vector); + _flat_send_IPI_mask(mask, vector); + } } else if (num_online_cpus() > 1) { __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } @@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector) static void flat_send_IPI_all(int vector) { if (vector == NMI_VECTOR) - flat_send_IPI_mask(cpu_online_map, vector); + flat_send_IPI_mask(cpu_online_mask, vector); else __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } @@ -135,9 +155,18 @@ static int flat_apic_id_registered(void) return physid_isset(read_xapic_id(), phys_cpu_present_map); } -static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) +{ + return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; +} + +static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) { - return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; + unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; + + return mask1 & mask2; } static unsigned int phys_pkg_id(int index_msb) @@ -157,8 +186,10 @@ struct genapic apic_flat = { .send_IPI_all = flat_send_IPI_all, .send_IPI_allbutself = flat_send_IPI_allbutself, .send_IPI_mask = flat_send_IPI_mask, + .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, @@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t physflat_target_cpus(void) +static const struct cpumask *physflat_target_cpus(void) { - return cpu_online_map; + return cpu_online_mask; } -static cpumask_t physflat_vector_allocation_domain(int cpu) +static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) { - return cpumask_of_cpu(cpu); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } -static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) +static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) { send_IPI_mask_sequence(cpumask, vector); } -static void physflat_send_IPI_allbutself(int vector) +static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, + int vector) { - cpumask_t allbutme = cpu_online_map; + send_IPI_mask_allbutself(cpumask, vector); +} - cpu_clear(smp_processor_id(), allbutme); - physflat_send_IPI_mask(allbutme, vector); +static void physflat_send_IPI_allbutself(int vector) +{ + send_IPI_mask_allbutself(cpu_online_mask, vector); } static void physflat_send_IPI_all(int vector) { - physflat_send_IPI_mask(cpu_online_map, vector); + physflat_send_IPI_mask(cpu_online_mask, vector); } -static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int +physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + struct genapic apic_physflat = { .name = "physical flat", .acpi_madt_oem_check = physflat_acpi_madt_oem_check, @@ -243,8 +296,10 @@ struct genapic apic_physflat = { .send_IPI_all = physflat_send_IPI_all, .send_IPI_allbutself = physflat_send_IPI_allbutself, .send_IPI_mask = physflat_send_IPI_mask, + .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, .send_IPI_self = apic_send_IPI_self, .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a6..6ce497cc372d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c @@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } /* * for now each logical cpu is in its own vector allocation domain. */ -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, * at once. We have 16 cpu's in a cluster. This will minimize IPI register * writes. */ -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { - __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), - vector, APIC_DEST_LOGICAL); - } + for_each_cpu(query_cpu, mask) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - cpu_clear(smp_processor_id(), mask); + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); +} + +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_logical_apicid, query_cpu), + vector, APIC_DEST_LOGICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; /* - * We're using fixed IRQ delivery, can only return one phys APIC ID. + * We're using fixed IRQ delivery, can only return one logical APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_logical_apicid, cpu); else return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one logical APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_logical_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b7..21bcc0e098ba 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t x2apic_target_cpus(void) +static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } -static cpumask_t x2apic_vector_allocation_domain(int cpu) +static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, @@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, x2apic_icr_write(cfg, apicid); } -static void x2apic_send_IPI_mask(cpumask_t mask, int vector) +static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned long flags; unsigned long query_cpu; local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu(query_cpu, mask) { __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } local_irq_restore(flags); } -static void x2apic_send_IPI_allbutself(int vector) +static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, + int vector) { - cpumask_t mask = cpu_online_map; + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) { + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + } + local_irq_restore(flags); +} - cpu_clear(smp_processor_id(), mask); +static void x2apic_send_IPI_allbutself(int vector) +{ + unsigned long flags; + unsigned long query_cpu; + unsigned long this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - x2apic_send_IPI_mask(mask, vector); + local_irq_save(flags); + for_each_online_cpu(query_cpu) + if (query_cpu != this_cpu) + __x2apic_send_IPI_dest( + per_cpu(x86_cpu_to_apicid, query_cpu), + vector, APIC_DEST_PHYSICAL); + local_irq_restore(flags); } static void x2apic_send_IPI_all(int vector) { - x2apic_send_IPI_mask(cpu_online_map, vector); + x2apic_send_IPI_mask(cpu_online_mask, vector); } static int x2apic_apic_id_registered(void) @@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void) return 1; } -static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); - if ((unsigned)cpu < NR_CPUS) + cpu = cpumask_first(cpumask); + if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb) return current_cpu_data.initial_apicid >> index_msb; } -void x2apic_send_IPI_self(int vector) +static void x2apic_send_IPI_self(int vector) { apic_write(APIC_SELF_IPI, vector); } -void init_x2apic_ldr(void) +static void init_x2apic_ldr(void) { return; } @@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = { .send_IPI_all = x2apic_send_IPI_all, .send_IPI_allbutself = x2apic_send_IPI_allbutself, .send_IPI_mask = x2apic_send_IPI_mask, + .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, .send_IPI_self = x2apic_send_IPI_self, .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index dece17289731..b193e082f6ce 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -79,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ -static cpumask_t uv_target_cpus(void) +static const struct cpumask *uv_target_cpus(void) { - return cpumask_of_cpu(0); + return cpumask_of(0); } -static cpumask_t uv_vector_allocation_domain(int cpu) +static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) { - cpumask_t domain = CPU_MASK_NONE; - cpu_set(cpu, domain); - return domain; + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); } int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) @@ -127,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector) uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } -static void uv_send_IPI_mask(cpumask_t mask, int vector) +static void uv_send_IPI_mask(const struct cpumask *mask, int vector) { unsigned int cpu; - for_each_possible_cpu(cpu) - if (cpu_isset(cpu, mask)) + for_each_cpu(cpu, mask) + uv_send_IPI_one(cpu, vector); +} + +static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); + + for_each_cpu(cpu, mask) + if (cpu != this_cpu) uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_allbutself(int vector) { - cpumask_t mask = cpu_online_map; - - cpu_clear(smp_processor_id(), mask); + unsigned int cpu; + unsigned int this_cpu = smp_processor_id(); - if (!cpus_empty(mask)) - uv_send_IPI_mask(mask, vector); + for_each_online_cpu(cpu) + if (cpu != this_cpu) + uv_send_IPI_one(cpu, vector); } static void uv_send_IPI_all(int vector) { - uv_send_IPI_mask(cpu_online_map, vector); + uv_send_IPI_mask(cpu_online_mask, vector); } static int uv_apic_id_registered(void) @@ -160,7 +168,7 @@ static void uv_init_apic_ldr(void) { } -static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) +static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) { int cpu; @@ -168,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) * We're using fixed IRQ delivery, can only return one phys APIC ID. * May as well be the first. */ - cpu = first_cpu(cpumask); + cpu = cpumask_first(cpumask); if ((unsigned)cpu < nr_cpu_ids) return per_cpu(x86_cpu_to_apicid, cpu); else return BAD_APICID; } +static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, + const struct cpumask *andmask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + for_each_cpu_and(cpu, cpumask, andmask) + if (cpumask_test_cpu(cpu, cpu_online_mask)) + break; + if (cpu < nr_cpu_ids) + return per_cpu(x86_cpu_to_apicid, cpu); + return BAD_APICID; +} + static unsigned int get_apic_id(unsigned long x) { unsigned int id; @@ -222,8 +247,10 @@ struct genapic apic_x2apic_uv_x = { .send_IPI_all = uv_send_IPI_all, .send_IPI_allbutself = uv_send_IPI_allbutself, .send_IPI_mask = uv_send_IPI_mask, + .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, .send_IPI_self = uv_send_IPI_self, .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, .phys_pkg_id = phys_pkg_id, .get_apic_id = get_apic_id, .set_apic_id = set_apic_id, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 388e05a5fc17..b9a4d8c4b935 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,7 +27,7 @@ #include <asm/trampoline.h> /* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda __read_mostly; +static struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 845ea097383e..cd759ad90690 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -248,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) * Start hpet with the boot cpu mask and make it * global after the IO_APIC has been initialized. */ - hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(&hpet_clockevent); global_clock_event = &hpet_clockevent; printk(KERN_DEBUG "hpet clockevent registered\n"); @@ -303,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); hpet_setup_msi_irq(hdev->irq); disable_irq(hdev->irq); - irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } break; @@ -451,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) return -1; disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); enable_irq(dev->irq); printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", @@ -502,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) /* 5 usec minimum reprogramming delta. */ evt->min_delta_ns = 5000; - evt->cpumask = cpumask_of_cpu(hdev->cpu); + evt->cpumask = cpumask_of(hdev->cpu); clockevents_register_device(evt); } diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c1b5e3ece1f2..10f92fb532f3 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -114,7 +114,7 @@ void __init setup_pit_timer(void) * Start pit with the boot cpu mask and make it global after the * IO_APIC has been initialized. */ - pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); + pit_clockevent.cpumask = cpumask_of(smp_processor_id()); pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_clockevent.shift); pit_clockevent.max_delta_ns = diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index d39918076bb4..df3bf269beab 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -10,7 +10,6 @@ #include <asm/pgtable.h> #include <asm/desc.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f6ea94b74da1..69911722b9d3 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -136,8 +136,8 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) struct irq_cfg { struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; + cpumask_var_t domain; + cpumask_var_t old_domain; unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; @@ -152,25 +152,25 @@ static struct irq_cfg irq_cfgx[] = { #else static struct irq_cfg irq_cfgx[NR_IRQS] = { #endif - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + [0] = { .vector = IRQ0_VECTOR, }, + [1] = { .vector = IRQ1_VECTOR, }, + [2] = { .vector = IRQ2_VECTOR, }, + [3] = { .vector = IRQ3_VECTOR, }, + [4] = { .vector = IRQ4_VECTOR, }, + [5] = { .vector = IRQ5_VECTOR, }, + [6] = { .vector = IRQ6_VECTOR, }, + [7] = { .vector = IRQ7_VECTOR, }, + [8] = { .vector = IRQ8_VECTOR, }, + [9] = { .vector = IRQ9_VECTOR, }, + [10] = { .vector = IRQ10_VECTOR, }, + [11] = { .vector = IRQ11_VECTOR, }, + [12] = { .vector = IRQ12_VECTOR, }, + [13] = { .vector = IRQ13_VECTOR, }, + [14] = { .vector = IRQ14_VECTOR, }, + [15] = { .vector = IRQ15_VECTOR, }, }; -void __init arch_early_irq_init(void) +int __init arch_early_irq_init(void) { struct irq_cfg *cfg; struct irq_desc *desc; @@ -183,7 +183,13 @@ void __init arch_early_irq_init(void) for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; + alloc_bootmem_cpumask_var(&cfg[i].domain); + alloc_bootmem_cpumask_var(&cfg[i].old_domain); + if (i < NR_IRQS_LEGACY) + cpumask_setall(cfg[i].domain); } + + return 0; } #ifdef CONFIG_SPARSE_IRQ @@ -207,12 +213,26 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + if (cfg) { + /* FIXME: needs alloc_cpumask_var_node() */ + if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { + kfree(cfg); + cfg = NULL; + } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { + free_cpumask_var(cfg->domain); + kfree(cfg); + cfg = NULL; + } else { + cpumask_clear(cfg->domain); + cpumask_clear(cfg->old_domain); + } + } printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); return cfg; } -void arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int cpu) { struct irq_cfg *cfg; @@ -224,6 +244,8 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu) BUG_ON(1); } } + + return 0; } #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC @@ -329,13 +351,14 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) } } -static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +static void +set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg = desc->chip_data; if (!cfg->move_in_progress) { /* it means that domain is not changed */ - if (!cpus_intersects(desc->affinity, mask)) + if (!cpumask_intersects(&desc->affinity, mask)) cfg->move_desc_pending = 1; } } @@ -350,7 +373,8 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +static inline void +set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) { } #endif @@ -481,6 +505,26 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; @@ -516,41 +560,55 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); -static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) +/* + * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid + * of that, or returns BAD_APICID and leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; unsigned int irq; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; irq = desc->irq; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; + return BAD_APICID; + cpumask_and(&desc->affinity, cfg->domain, mask); set_extra_move_desc(desc, mask); + return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); +} - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); +static void +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg); - desc->affinity = mask; + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + } spin_unlock_irqrestore(&ioapic_lock, flags); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc; @@ -648,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) } #ifdef CONFIG_X86_64 -void io_apic_sync(struct irq_pin_list *entry) +static void io_apic_sync(struct irq_pin_list *entry) { /* * Synchronize the IO-APIC and the CPU by doing @@ -1218,7 +1276,8 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1233,49 +1292,49 @@ static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; - int cpu; + int cpu, err; + cpumask_var_t tmp_mask; if ((cfg->move_in_progress) || cfg->move_cleanup_count) return -EBUSY; - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); + if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) + return -ENOMEM; old_vector = cfg->vector; if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) + cpumask_and(tmp_mask, mask, cpu_online_mask); + cpumask_and(tmp_mask, cfg->domain, tmp_mask); + if (!cpumask_empty(tmp_mask)) { + free_cpumask_var(tmp_mask); return 0; + } } - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; + /* Only try and allocate irqs on cpus that are present */ + err = -ENOSPC; + for_each_cpu_and(cpu, mask, cpu_online_mask) { int new_cpu; int vector, offset; - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); + vector_allocation_domain(cpu, tmp_mask); vector = current_vector; offset = current_offset; next: vector += 8; if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ + /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; vector = FIRST_DEVICE_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) + + if (test_bit(vector, used_vectors)) goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) + + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; /* Found one! */ @@ -1283,18 +1342,21 @@ next: current_offset = offset; if (old_vector) { cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; + cpumask_copy(cfg->old_domain, cfg->domain); } - for_each_cpu_mask_nr(new_cpu, new_mask) + for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) per_cpu(vector_irq, new_cpu)[vector] = irq; cfg->vector = vector; - cfg->domain = domain; - return 0; + cpumask_copy(cfg->domain, tmp_mask); + err = 0; + break; } - return -ENOSPC; + free_cpumask_var(tmp_mask); + return err; } -static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) +static int +assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -1307,23 +1369,20 @@ static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - cpumask_t mask; int cpu, vector; BUG_ON(!cfg->vector); vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) + for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; - cpus_clear(cfg->domain); + cpumask_clear(cfg->domain); if (likely(!cfg->move_in_progress)) return; - cpus_and(mask, cfg->old_domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) @@ -1345,10 +1404,8 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { - if (!desc) - continue; cfg = desc->chip_data; - if (!cpu_isset(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; @@ -1360,7 +1417,7 @@ void __setup_vector_irq(int cpu) continue; cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1496,18 +1553,17 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - cpumask_t mask; + unsigned int dest; if (!IO_APIC_IRQ(irq)) return; cfg = desc->chip_data; - mask = TARGET_CPUS; - if (assign_irq_vector(irq, cfg, mask)) + if (assign_irq_vector(irq, cfg, TARGET_CPUS)) return; - cpus_and(mask, cfg->domain, mask); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1517,8 +1573,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { + dest, trigger, polarity, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); __clear_irq_vector(irq, cfg); @@ -1730,8 +1785,6 @@ __apicdebuginit(void) print_IO_APIC(void) for_each_irq_desc(irq, desc) { struct irq_pin_list *entry; - if (!desc) - continue; cfg = desc->chip_data; entry = cfg->irq_2_pin; if (!entry) @@ -2240,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq) unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); spin_unlock_irqrestore(&vector_lock, flags); return 1; @@ -2289,18 +2342,17 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) +static void +migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; - cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; unsigned int irq; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + if (!cpumask_intersects(mask, cpu_online_mask)) return; irq = desc->irq; @@ -2313,8 +2365,7 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) set_extra_move_desc(desc, mask); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, mask); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { @@ -2331,14 +2382,10 @@ static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) */ modify_irte(irq, &irte); - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } + if (cfg->move_in_progress) + send_cleanup_vector(cfg); - desc->affinity = mask; + cpumask_copy(&desc->affinity, mask); } static int migrate_irq_remapped_level_desc(struct irq_desc *desc) @@ -2360,11 +2407,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq_desc(desc, desc->pending_mask); + migrate_ioapic_irq_desc(desc, &desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); + cpumask_clear(&desc->pending_mask); unmask: unmask_IO_APIC_irq_desc(desc); @@ -2378,9 +2425,6 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { - if (!desc) - continue; - if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2392,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, desc->pending_mask); + desc->chip->set_affinity(irq, &desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -2401,18 +2445,20 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, + const struct cpumask *mask) { if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; + cpumask_copy(&desc->pending_mask, mask); migrate_irq_remapped_level_desc(desc); return; } migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ir_ioapic_affinity_irq(unsigned int irq, + const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -2447,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (!cfg->move_cleanup_count) goto unlock; - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; __get_cpu_var(vector_irq)[vector] = -1; @@ -2484,20 +2530,14 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) + send_cleanup_vector(cfg); } #else static inline void irq_complete_move(struct irq_desc **descp) {} @@ -2670,9 +2710,6 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for_each_irq_desc(irq, desc) { - if (!desc) - continue; - cfg = desc->chip_data; if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* @@ -3222,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms struct irq_cfg *cfg; int err; unsigned dest; - cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (err) return err; - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { @@ -3285,26 +3319,18 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); read_msi_msg_desc(desc, &msg); @@ -3314,37 +3340,27 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); - desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +static void +ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg; + struct irq_cfg *cfg = desc->chip_data; unsigned int dest; - cpumask_t tmp, cleanup_mask; struct irte irte; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - if (get_irte(irq, &irte)) return; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3358,14 +3374,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) * at the new destination. So, time to cleanup the previous * vector allocation. */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; + if (cfg->move_in_progress) + send_cleanup_vector(cfg); } #endif @@ -3556,26 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); dmar_msi_read(irq, &msg); @@ -3585,7 +3587,6 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3619,26 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); hpet_msi_read(irq, &msg); @@ -3648,7 +3641,6 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -3703,28 +3695,19 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; - cpumask_t tmp; - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) return; cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return; - - set_extra_move_desc(desc, mask); - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc->affinity = mask; } #endif @@ -3744,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { struct irq_cfg *cfg; int err; - cpumask_t tmp; cfg = irq_cfg(irq); - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, cfg, tmp); + err = assign_irq_vector(irq, cfg, TARGET_CPUS); if (!err) { struct ht_irq_msg msg; unsigned dest; - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); + dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -3790,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset) { - const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + const struct cpumask *eligible_cpu = cpumask_of(cpu); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3800,7 +3780,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, *eligible_cpu); + err = assign_irq_vector(irq, cfg, eligible_cpu); if (err != 0) return err; @@ -3819,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, entry->polarity = 0; entry->trigger = 0; entry->mask = 0; - entry->dest = cpu_mask_to_apicid(*eligible_cpu); + entry->dest = cpu_mask_to_apicid(eligible_cpu); mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); @@ -4030,7 +4010,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; struct irq_desc *desc; struct irq_cfg *cfg; - cpumask_t mask; + const struct cpumask *mask; if (skip_ioapic_setup == 1) return; @@ -4061,7 +4041,7 @@ void __init setup_ioapic_dest(void) */ if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->affinity; + mask = &desc->affinity; else mask = TARGET_CPUS; diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f35..285bbf8831fa 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) /* * This is only used on smaller machines. */ -void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) +void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) { - unsigned long mask = cpus_addr(cpumask)[0]; + unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; local_irq_save(flags); - WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); + WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __send_IPI_dest_field(mask, vector); local_irq_restore(flags); } -void send_IPI_mask_sequence(cpumask_t mask, int vector) +void send_IPI_mask_sequence(const struct cpumask *mask, int vector) { unsigned long flags; unsigned int query_cpu; @@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) */ local_irq_save(flags); - for_each_possible_cpu(query_cpu) { - if (cpu_isset(query_cpu, mask)) { + for_each_cpu(query_cpu, mask) + __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); + local_irq_restore(flags); +} + +void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned long flags; + unsigned int query_cpu; + unsigned int this_cpu = smp_processor_id(); + + /* See Hack comment above */ + + local_irq_save(flags); + for_each_cpu(query_cpu, mask) + if (query_cpu != this_cpu) __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); - } - } local_irq_restore(flags); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3f1d9d18df67..bce53e1352a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -9,6 +9,7 @@ #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/smp.h> +#include <asm/irq.h> atomic_t irq_err_count; @@ -190,3 +191,5 @@ u64 arch_irq_stat(void) #endif return sum; } + +EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 119fc9c8ff7f..9dc5588f336a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -233,27 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs) #ifdef CONFIG_HOTPLUG_CPU #include <mach_apic.h> -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; + const struct cpumask *affinity; if (!desc) continue; if (irq == 2) continue; - cpus_and(mask, desc->affinity, map); - if (any_online_cpu(mask) == NR_CPUS) { + affinity = &desc->affinity; + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { printk("Breaking affinity for irq %i\n", irq); - mask = map; + affinity = cpu_all_mask; } if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, affinity); else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a174a217eb1a..6383d50f82ea 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -80,16 +80,17 @@ asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) } #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(cpumask_t map) +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) { unsigned int irq; static int warned; struct irq_desc *desc; for_each_irq_desc(irq, desc) { - cpumask_t mask; int break_affinity = 0; int set_affinity = 1; + const struct cpumask *affinity; if (!desc) continue; @@ -99,23 +100,23 @@ void fixup_irqs(cpumask_t map) /* interrupt's are disabled at this point */ spin_lock(&desc->lock); + affinity = &desc->affinity; if (!irq_has_action(irq) || - cpus_equal(desc->affinity, map)) { + cpumask_equal(affinity, cpu_online_mask)) { spin_unlock(&desc->lock); continue; } - cpus_and(mask, desc->affinity, map); - if (cpus_empty(mask)) { + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; - mask = map; + affinity = cpu_all_mask; } if (desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, mask); + desc->chip->set_affinity(irq, affinity); else if (!(warned++)) set_affinity = 0; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 203384ed2b5d..84723295f88a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -110,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -146,10 +158,12 @@ void __init native_init_IRQ(void) alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); /* IPI for single call function */ - set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 6190e6ef546c..31ebfe38e96c 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -69,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 }; +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + void __init init_ISA_irqs(void) { int i; @@ -121,6 +133,7 @@ static void __init smp_intr_init(void) /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e169ae9b6a62..652fce6d2cce 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) */ static unsigned long kvm_get_tsc_khz(void) { - return preset_lpj; + struct pvclock_vcpu_time_info *src; + src = &per_cpu(hv_clock, 0); + return pvclock_tsc_khz(src); } static void kvm_get_preset_lpj(void) { - struct pvclock_vcpu_time_info *src; unsigned long khz; u64 lpj; - src = &per_cpu(hv_clock, 0); - khz = pvclock_tsc_khz(src); + khz = kvm_get_tsc_khz(); lpj = ((u64)khz * 1000); do_div(lpj, HZ); @@ -194,5 +194,7 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register(&kvm_clock); + pv_info.paravirt_enabled = 1; + pv_info.name = "KVM"; } } diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index eee32b43fee3..71f1d99a635d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,8 +12,8 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/vmalloc.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/system.h> #include <asm/ldt.h> #include <asm/desc.h> @@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) if (err < 0) return err; - for(i = 0; i < old->size; i++) + for (i = 0; i < old->size; i++) write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3b599518c322..c12314c9e86f 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { .set_mode = mfgpt_set_mode, .set_next_event = mfgpt_next_event, .rating = 250, - .cpumask = CPU_MASK_ALL, + .cpumask = cpu_all_mask, .shift = 32 }; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe85..666e43df51f9 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -13,8 +13,7 @@ #include <asm/msr.h> #include <asm/acpi.h> #include <asm/mmconfig.h> - -#include "../pci/pci.h" +#include <asm/pci_x86.h> struct pci_hostbridge_probe { u32 bus; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 45e3b69808ba..c5c5b8df1dbc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -16,14 +16,14 @@ #include <linux/bitops.h> #include <linux/acpi.h> #include <linux/module.h> +#include <linux/smp.h> +#include <linux/acpi.h> -#include <asm/smp.h> #include <asm/mtrr.h> #include <asm/mpspec.h> #include <asm/pgalloc.h> #include <asm/io_apic.h> #include <asm/proto.h> -#include <asm/acpi.h> #include <asm/bios_ebda.h> #include <asm/e820.h> #include <asm/trampoline.h> @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { @@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) x86_quirks->mpc_oem_pci_bus(m); clear_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8bd1bf9622a7..45a09ccdc214 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -26,11 +26,10 @@ #include <linux/kernel_stat.h> #include <linux/kdebug.h> #include <linux/smp.h> +#include <linux/nmi.h> #include <asm/i8259.h> #include <asm/io_apic.h> -#include <asm/smp.h> -#include <asm/nmi.h> #include <asm/proto.h> #include <asm/timer.h> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a35eaa379ff6..00c2bcd41463 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ * to trigger bugs with some popular PCI cards, in particular 3ware (but * has been also also seen with Qlogic at least). */ -int iommu_fullflush = 1; +static int iommu_fullflush = 1; /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 61f718df6eec..bf088c61fa40 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,8 @@ #include <asm/proto.h> #include <asm/reboot_fixups.h> #include <asm/reboot.h> +#include <asm/pci_x86.h> +#include <asm/virtext.h> #ifdef CONFIG_X86_32 # include <linux/dmi.h> @@ -23,7 +25,6 @@ #include <mach_ipi.h> - /* * Power off function, if any */ @@ -39,6 +40,12 @@ int reboot_force; static int reboot_cpu = -1; #endif +/* This is set if we need to go through the 'emergency' path. + * When machine_emergency_restart() is called, we may be on + * an inconsistent state and won't be able to do a clean cleanup + */ +static int reboot_emergency; + /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ bool port_cf9_safe = false; @@ -368,6 +375,48 @@ static inline void kb_wait(void) } } +static void vmxoff_nmi(int cpu, struct die_args *args) +{ + cpu_emergency_vmxoff(); +} + +/* Use NMIs as IPIs to tell all CPUs to disable virtualization + */ +static void emergency_vmx_disable_all(void) +{ + /* Just make sure we won't change CPUs while doing this */ + local_irq_disable(); + + /* We need to disable VMX on all CPUs before rebooting, otherwise + * we risk hanging up the machine, because the CPU ignore INIT + * signals when VMX is enabled. + * + * We can't take any locks and we may be on an inconsistent + * state, so we use NMIs as IPIs to tell the other CPUs to disable + * VMX and halt. + * + * For safety, we will avoid running the nmi_shootdown_cpus() + * stuff unnecessarily, but we don't have a way to check + * if other CPUs have VMX enabled. So we will call it only if the + * CPU we are running on has VMX enabled. + * + * We will miss cases where VMX is not enabled on all CPUs. This + * shouldn't do much harm because KVM always enable VMX on all + * CPUs anyway. But we can miss it on the small window where KVM + * is still enabling VMX. + */ + if (cpu_has_vmx() && cpu_vmx_enabled()) { + /* Disable VMX on this CPU. + */ + cpu_vmxoff(); + + /* Halt and disable VMX on the other CPUs */ + nmi_shootdown_cpus(vmxoff_nmi); + + } +} + + void __attribute__((weak)) mach_reboot_fixups(void) { } @@ -376,6 +425,9 @@ static void native_machine_emergency_restart(void) { int i; + if (reboot_emergency) + emergency_vmx_disable_all(); + /* Tell the BIOS if we want cold or warm reboot */ *((unsigned short *)__va(0x472)) = reboot_mode; @@ -482,13 +534,19 @@ void native_machine_shutdown(void) #endif } +static void __machine_emergency_restart(int emergency) +{ + reboot_emergency = emergency; + machine_ops.emergency_restart(); +} + static void native_machine_restart(char *__unused) { printk("machine restart\n"); if (!reboot_force) machine_shutdown(); - machine_emergency_restart(); + __machine_emergency_restart(0); } static void native_machine_halt(void) @@ -532,7 +590,7 @@ void machine_shutdown(void) void machine_emergency_restart(void) { - machine_ops.emergency_restart(); + __machine_emergency_restart(1); } void machine_restart(char *cmd) @@ -592,10 +650,7 @@ static int crash_nmi_callback(struct notifier_block *self, static void smp_send_nmi_allbutself(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(safe_smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_IPI_mask(mask, NMI_VECTOR); + send_IPI_allbutself(NMI_VECTOR); } static struct notifier_block crash_nmi_nb = { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb770..0b63b08e7530 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -152,6 +152,11 @@ void __init setup_per_cpu_areas(void) old_size = PERCPU_ENOUGH_ROOM; align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); + + printk(KERN_INFO + "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", + NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); + printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -168,24 +173,24 @@ void __init setup_per_cpu_areas(void) "cpu %d has no node %d or node-local memory\n", cpu, node); if (ptr) - printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + printk(KERN_DEBUG + "per cpu data for cpu%d at %016lx\n", cpu, __pa(ptr)); } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); if (ptr) - printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", - cpu, node, __pa(ptr)); + printk(KERN_DEBUG + "per cpu data for cpu%d on node%d " + "at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } - printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", - NR_CPUS, nr_cpu_ids, nr_node_ids); - /* Setup percpu data maps */ setup_per_cpu_maps(); @@ -282,7 +287,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) else cpu_clear(cpu, *mask); - cpulist_scnprintf(buf, sizeof(buf), *mask); + cpulist_scnprintf(buf, sizeof(buf), mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 7e558db362c1..beea2649a240 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -118,22 +118,22 @@ static void native_smp_send_reschedule(int cpu) WARN_ON(1); return; } - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); + send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); } void native_send_call_func_single_ipi(int cpu) { - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); + send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); } -void native_send_call_func_ipi(cpumask_t mask) +void native_send_call_func_ipi(const struct cpumask *mask) { cpumask_t allbutself; allbutself = cpu_online_map; cpu_clear(smp_processor_id(), allbutself); - if (cpus_equal(mask, allbutself) && + if (cpus_equal(*mask, allbutself) && cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f8500c969442..31869bf5fabd 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -102,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; -/* bitmap of online cpus */ -cpumask_t cpu_online_map __read_mostly; -EXPORT_SYMBOL(cpu_online_map); - cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; -cpumask_t cpu_possible_map; -EXPORT_SYMBOL(cpu_possible_map); /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); @@ -1260,6 +1254,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) check_nmi_watchdog(); } +static int __initdata setup_possible_cpus = -1; +static int __init _setup_possible_cpus(char *str) +{ + get_option(&str, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); + + /* * cpu_possible_map should be static, it cannot change as cpu's * are onlined, or offlined. The reason is per-cpu data-structures @@ -1272,7 +1275,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) * * Three ways to find out the number of additional hotplug CPUs: * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with additional_cpus=NUM + * - The user can overwrite it with possible_cpus=NUM * - Otherwise don't reserve additional CPUs. * We do this because additional CPUs waste a lot of memory. * -AK @@ -1285,9 +1288,17 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; - possible = num_processors + disabled_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; + if (setup_possible_cpus == -1) + possible = num_processors + disabled_cpus; + else + possible = setup_possible_cpus; + + if (possible > CONFIG_NR_CPUS) { + printk(KERN_WARNING + "%d Processors exceeds NR_CPUS limit of %d\n", + possible, CONFIG_NR_CPUS); + possible = CONFIG_NR_CPUS; + } printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); @@ -1352,7 +1363,7 @@ void cpu_disable_common(void) lock_vector_lock(); remove_cpu_from_maps(cpu); unlock_vector_lock(); - fixup_irqs(cpu_online_map); + fixup_irqs(); } int native_cpu_disable(void) diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index 8da059f949be..ce5054642247 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -163,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 29887d7081a9..f8be6f1d2e48 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, * We have to send the IPI only to * CPUs affected. */ - send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); + send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); while (!cpus_empty(f->flush_cpumask)) cpu_relax(); diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 6a00e5faaa74..f885023167e0 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -582,7 +582,6 @@ static int __init uv_ptc_init(void) static struct bau_control * __init uv_table_bases_init(int blade, int node) { int i; - int *ip; struct bau_msg_status *msp; struct bau_control *bau_tabp; @@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) bau_cpubits_clear(&msp->seen_by, (int) uv_blade_nr_possible_cpus(blade)); - bau_tabp->watching = - kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); - BUG_ON(!bau_tabp->watching); - - for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) - *ip = 0; - uv_bau_table_bases[blade] = bau_tabp; return bau_tabp; @@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, bcp->bau_msg_head = bau_tablesp->va_queue_first; bcp->va_queue_first = bau_tablesp->va_queue_first; bcp->va_queue_last = bau_tablesp->va_queue_last; - bcp->watching = bau_tablesp->watching; bcp->msg_statuses = bau_tablesp->msg_statuses; bcp->descriptor_base = adp; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 141907ab6e22..ce6650eb64e9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -72,9 +72,6 @@ #include "cpu/mcheck/mce.h" -DECLARE_BITMAP(used_vectors, NR_VECTORS); -EXPORT_SYMBOL_GPL(used_vectors); - asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -89,6 +86,9 @@ gate_desc idt_table[256] __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; #endif +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); + static int ignore_nmis; static inline void conditional_sti(struct pt_regs *regs) @@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_no = 8; - /* This is always a kernel trap and never fixable (and thus must - never return). */ + /* + * This is always a kernel trap and never fixable (and thus must + * never return). + */ for (;;) die(str, regs, error_code); } @@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) } #ifdef CONFIG_X86_64 -/* Help handler running on IST stack to switch back to user stack - for scheduling or signal handling. The actual stack switch is done in - entry.S */ +/* + * Help handler running on IST stack to switch back to user stack + * for scheduling or signal handling. The actual stack switch is done in + * entry.S + */ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; @@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) /* Exception from user space */ else if (user_mode(eregs)) regs = task_pt_regs(current); - /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + /* + * Exception from kernel and interrupts are enabled. Move to + * kernel process stack. + */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -685,12 +691,7 @@ void math_error(void __user *ip) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - err = swd & ~cwd & 0x3f; - -#ifdef CONFIG_X86_32 - if (!err) - return; -#endif + err = swd & ~cwd; if (err & 0x001) { /* Invalid op */ /* @@ -708,7 +709,11 @@ void math_error(void __user *ip) } else if (err & 0x020) { /* Precision */ info.si_code = FPE_FLTRES; } else { - info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ + /* + * If we're using IRQ 13, or supposedly even some trap 16 + * implementations, it's possible we get a spurious trap... + */ + return; /* Spurious trap, no error */ } force_sig_info(SIGFPE, &info, task); } @@ -941,9 +946,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) void __init trap_init(void) { -#ifdef CONFIG_X86_32 int i; -#endif #ifdef CONFIG_EISA void __iomem *p = early_ioremap(0x0FFFD9, 4); @@ -1000,11 +1003,15 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); +#endif /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); +#ifdef CONFIG_X86_64 + set_bit(IA32_SYSCALL_VECTOR, used_vectors); +#else set_bit(SYSCALL_VECTOR, used_vectors); #endif /* diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 254ee07f8635..c4c1f9e09402 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) /* Upper bound is clockevent's use of ulong for cycle deltas. */ evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); evt->min_delta_ns = clockevent_delta2ns(1, evt); - evt->cpumask = cpumask_of_cpu(cpu); + evt->cpumask = cpumask_of(cpu); printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", evt->name, evt->mult, evt->shift); diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 15c3e6999182..2b54fe002e94 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) * Restore the extended state if present. Otherwise, restore the FP/SSE * state. */ -int restore_user_xstate(void __user *buf) +static int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; u64 mask; diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 59ebd37ad79e..e665d1c623ca 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { + struct kvm_vcpu *vcpu; + int i; + mutex_lock(&kvm->lock); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); + + /* + * Provides NMI watchdog support via Virtual Wire mode. + * The route is: PIT -> PIC -> LVT0 in NMI mode. + * + * Note: Our Virtual Wire implementation is simplified, only + * propagating PIT interrupts to all VCPUs when they have set + * LVT0 to NMI delivery. Other PIC interrupts are just sent to + * VCPU0, and only if its LVT0 is in EXTINT mode. + */ + if (kvm->arch.vapics_in_nmi_mode > 0) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (vcpu) + kvm_apic_nmi_wd_deliver(vcpu); + } } void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 17e41e165f1a..179dcb0103fd 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -26,10 +26,40 @@ * Port from Qemu. */ #include <linux/mm.h> +#include <linux/bitops.h> #include "irq.h" #include <linux/kvm_host.h> +static void pic_lock(struct kvm_pic *s) +{ + spin_lock(&s->lock); +} + +static void pic_unlock(struct kvm_pic *s) +{ + struct kvm *kvm = s->kvm; + unsigned acks = s->pending_acks; + bool wakeup = s->wakeup_needed; + struct kvm_vcpu *vcpu; + + s->pending_acks = 0; + s->wakeup_needed = false; + + spin_unlock(&s->lock); + + while (acks) { + kvm_notify_acked_irq(kvm, __ffs(acks)); + acks &= acks - 1; + } + + if (wakeup) { + vcpu = s->kvm->vcpus[0]; + if (vcpu) + kvm_vcpu_kick(vcpu); + } +} + static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); @@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s) void kvm_pic_update_irq(struct kvm_pic *s) { + pic_lock(s); pic_update_irq(s); + pic_unlock(s); } void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; + pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); } + pic_unlock(s); } /* @@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); + pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); @@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); + pic_unlock(s); kvm_notify_acked_irq(kvm, irq); return intno; @@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm) void kvm_pic_reset(struct kvm_kpic_state *s) { - int irq, irqbase; + int irq, irqbase, n; struct kvm *kvm = s->pics_state->irq_request_opaque; struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; @@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s) for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) - if (s->irr & (1 << irq) || s->isr & (1 << irq)) - kvm_notify_acked_irq(kvm, irq+irqbase); + if (s->irr & (1 << irq) || s->isr & (1 << irq)) { + n = irq + irqbase; + s->pics_state->pending_acks |= 1 << n; + } } s->last_irr = 0; s->irr = 0; @@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte write\n"); return; } + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this, elcr_ioport_write(&s->pics[addr & 1], addr, data); break; } + pic_unlock(s); } static void picdev_read(struct kvm_io_device *this, @@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte read\n"); return; } + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this, break; } *(unsigned char *)val = data; + pic_unlock(s); } /* @@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level) s->output = level; if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { s->pics[0].isr_ack &= ~(1 << irq); - kvm_vcpu_kick(vcpu); + s->wakeup_needed = true; } } @@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) return NULL; + spin_lock_init(&s->lock); + s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; s->pics[1].elcr_mask = 0xde; s->irq_request = pic_irq_request; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f17c8f5bbf31..2bf32a03ceec 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -25,6 +25,7 @@ #include <linux/mm_types.h> #include <linux/hrtimer.h> #include <linux/kvm_host.h> +#include <linux/spinlock.h> #include "iodev.h" #include "ioapic.h" @@ -59,6 +60,10 @@ struct kvm_kpic_state { }; struct kvm_pic { + spinlock_t lock; + bool wakeup_needed; + unsigned pending_acks; + struct kvm *kvm; struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ irq_request_func *irq_request; void *irq_request_opaque; @@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); void __kvm_migrate_timers(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 65ef0fc2c036..8e5ee99551f6 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h @@ -7,7 +7,7 @@ #include <linux/kvm_host.h> #include <asm/msr.h> -#include "svm.h" +#include <asm/svm.h> static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fc3cab48943..afac68c0815c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; } +static inline int apic_lvt_nmi_mode(u32 lvt_val) +{ + return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; +} + static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ @@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, case APIC_DM_NMI: kvm_inject_nmi(vcpu); + kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: @@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } break; + case APIC_DM_EXTINT: + /* + * Should only be called by kvm_apic_local_deliver() with LVT0, + * before NMI watchdog was enabled. Already handled by + * kvm_apic_accept_pic_intr(). + */ + break; + default: printk(KERN_ERR "TODO: unsupported delivery mode %x\n", delivery_mode); @@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->timer.period))); } +static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) +{ + int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + + if (apic_lvt_nmi_mode(lvt0_val)) { + if (!nmi_wd_enabled) { + apic_debug("Receive NMI setting on APIC_LVT0 " + "for cpu %d\n", apic->vcpu->vcpu_id); + apic->vcpu->kvm->arch.vapics_in_nmi_mode++; + } + } else if (nmi_wd_enabled) + apic->vcpu->kvm->arch.vapics_in_nmi_mode--; +} + static void apic_mmio_write(struct kvm_io_device *this, gpa_t address, int len, const void *data) { @@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this, apic_set_reg(apic, APIC_ICR2, val & 0xff000000); break; + case APIC_LVT0: + apic_manage_nmi_watchdog(apic, val); case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: - case APIC_LVT0: case APIC_LVT1: case APIC_LVTERR: /* TODO: Check vector */ @@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) return 0; } -static int __inject_apic_timer_irq(struct kvm_lapic *apic) +static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) +{ + u32 reg = apic_get_reg(apic, lvt_type); + int vector, mode, trig_mode; + + if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { + vector = reg & APIC_VECTOR_MASK; + mode = reg & APIC_MODE_MASK; + trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; + return __apic_accept_irq(apic, mode, vector, 1, trig_mode); + } + return 0; +} + +void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) { - int vector; + struct kvm_lapic *apic = vcpu->arch.apic; - vector = apic_lvt_vector(apic, APIC_LVTT); - return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); + if (apic) + kvm_apic_local_deliver(apic, APIC_LVT0); } static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) @@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && apic_lvt_enabled(apic, APIC_LVTT) && - atomic_read(&apic->timer.pending) > 0) { - if (__inject_apic_timer_irq(apic)) + if (apic && atomic_read(&apic->timer.pending) > 0) { + if (kvm_apic_local_deliver(apic, APIC_LVTT)) atomic_dec(&apic->timer.pending); } } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 410ddbc1aa2e..83f11c7474a1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -17,7 +17,6 @@ * */ -#include "vmx.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -33,6 +32,7 @@ #include <asm/page.h> #include <asm/cmpxchg.h> #include <asm/io.h> +#include <asm/vmx.h> /* * When setting this variable to true it enables Two-Dimensional-Paging @@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; +static u64 __read_mostly shadow_mt_mask; void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte) EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; + shadow_mt_mask = mt_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count += 1; } @@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; - write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); + gfn = unalias_gfn(kvm, gfn); + write_count = slot_largepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); *write_count -= 1; WARN_ON(*write_count < 0); } static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) { - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + struct kvm_memory_slot *slot; int *largepage_idx; + gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); if (slot) { largepage_idx = slot_largepage_idx(gfn, slot); return *largepage_idx; @@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) return NULL; } -static void rmap_write_protect(struct kvm *kvm, u64 gfn) +static int rmap_write_protect(struct kvm *kvm, u64 gfn) { unsigned long *rmapp; u64 *spte; @@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) spte = rmap_next(kvm, rmapp, spte); } - if (write_protected) - kvm_flush_remote_tlbs(kvm); + return write_protected; } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) @@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&sp->oos_link); ASSERT(is_empty_shadow_page(sp->spt)); - sp->slot_bitmap = 0; + bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; + sp->global = 1; sp->parent_pte = parent_pte; --vcpu->kvm->arch.n_free_mmu_pages; return sp; @@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) struct kvm_mmu_page *sp = page_header(__pa(spte)); index = spte - sp->spt; - __set_bit(index, sp->unsync_child_bitmap); - sp->unsync_children = 1; + if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) + sp->unsync_children++; + WARN_ON(!sp->unsync_children); } static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) @@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - sp->unsync_children = 1; kvm_mmu_update_parents_unsync(sp); return 1; } @@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { } +#define KVM_PAGE_ARRAY_NR 16 + +struct kvm_mmu_pages { + struct mmu_page_and_offset { + struct kvm_mmu_page *sp; + unsigned int idx; + } page[KVM_PAGE_ARRAY_NR]; + unsigned int nr; +}; + #define for_each_unsync_children(bitmap, idx) \ for (idx = find_first_bit(bitmap, 512); \ idx < 512; \ idx = find_next_bit(bitmap, 512, idx+1)) -static int mmu_unsync_walk(struct kvm_mmu_page *sp, - struct kvm_unsync_walk *walker) +int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, + int idx) { - int i, ret; + int i; - if (!sp->unsync_children) - return 0; + if (sp->unsync) + for (i=0; i < pvec->nr; i++) + if (pvec->page[i].sp == sp) + return 0; + + pvec->page[pvec->nr].sp = sp; + pvec->page[pvec->nr].idx = idx; + pvec->nr++; + return (pvec->nr == KVM_PAGE_ARRAY_NR); +} + +static int __mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + int i, ret, nr_unsync_leaf = 0; for_each_unsync_children(sp->unsync_child_bitmap, i) { u64 ent = sp->spt[i]; - if (is_shadow_present_pte(ent)) { + if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { struct kvm_mmu_page *child; child = page_header(ent & PT64_BASE_ADDR_MASK); if (child->unsync_children) { - ret = mmu_unsync_walk(child, walker); - if (ret) + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; + + ret = __mmu_unsync_walk(child, pvec); + if (!ret) + __clear_bit(i, sp->unsync_child_bitmap); + else if (ret > 0) + nr_unsync_leaf += ret; + else return ret; - __clear_bit(i, sp->unsync_child_bitmap); } if (child->unsync) { - ret = walker->entry(child, walker); - __clear_bit(i, sp->unsync_child_bitmap); - if (ret) - return ret; + nr_unsync_leaf++; + if (mmu_pages_add(pvec, child, i)) + return -ENOSPC; } } } @@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) sp->unsync_children = 0; - return 0; + return nr_unsync_leaf; +} + +static int mmu_unsync_walk(struct kvm_mmu_page *sp, + struct kvm_mmu_pages *pvec) +{ + if (!sp->unsync_children) + return 0; + + mmu_pages_add(pvec, sp, 0); + return __mmu_unsync_walk(sp, pvec); } static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) @@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) return NULL; } +static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + list_del(&sp->oos_link); + --kvm->stat.mmu_unsync_global; +} + static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); sp->unsync = 0; + if (sp->global) + kvm_unlink_unsync_global(kvm, sp); --kvm->stat.mmu_unsync; } @@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 1; } - rmap_write_protect(vcpu->kvm, sp->gfn); + if (rmap_write_protect(vcpu->kvm, sp->gfn)) + kvm_flush_remote_tlbs(vcpu->kvm); kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); @@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) return 0; } -struct sync_walker { - struct kvm_vcpu *vcpu; - struct kvm_unsync_walk walker; +struct mmu_page_path { + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; + unsigned int idx[PT64_ROOT_LEVEL-1]; }; -static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +#define for_each_sp(pvec, sp, parents, i) \ + for (i = mmu_pages_next(&pvec, &parents, -1), \ + sp = pvec.page[i].sp; \ + i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ + i = mmu_pages_next(&pvec, &parents, i)) + +int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, + int i) { - struct sync_walker *sync_walk = container_of(walk, struct sync_walker, - walker); - struct kvm_vcpu *vcpu = sync_walk->vcpu; + int n; - kvm_sync_page(vcpu, sp); - return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); + for (n = i+1; n < pvec->nr; n++) { + struct kvm_mmu_page *sp = pvec->page[n].sp; + + if (sp->role.level == PT_PAGE_TABLE_LEVEL) { + parents->idx[0] = pvec->page[n].idx; + return n; + } + + parents->parent[sp->role.level-2] = sp; + parents->idx[sp->role.level-1] = pvec->page[n].idx; + } + + return n; } -static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +void mmu_pages_clear_parents(struct mmu_page_path *parents) { - struct sync_walker walker = { - .walker = { .entry = mmu_sync_fn, }, - .vcpu = vcpu, - }; + struct kvm_mmu_page *sp; + unsigned int level = 0; + + do { + unsigned int idx = parents->idx[level]; + + sp = parents->parent[level]; + if (!sp) + return; + + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); + level++; + } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); +} + +static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, + struct mmu_page_path *parents, + struct kvm_mmu_pages *pvec) +{ + parents->parent[parent->role.level-1] = NULL; + pvec->nr = 0; +} + +static void mmu_sync_children(struct kvm_vcpu *vcpu, + struct kvm_mmu_page *parent) +{ + int i; + struct kvm_mmu_page *sp; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + int protected = 0; - while (mmu_unsync_walk(sp, &walker.walker)) + for_each_sp(pages, sp, parents, i) + protected |= rmap_write_protect(vcpu->kvm, sp->gfn); + + if (protected) + kvm_flush_remote_tlbs(vcpu->kvm); + + for_each_sp(pages, sp, parents, i) { + kvm_sync_page(vcpu, sp); + mmu_pages_clear_parents(&parents); + } cond_resched_lock(&vcpu->kvm->mmu_lock); + kvm_mmu_pages_init(parent, &parents, &pages); + } } static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, @@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp->role = role; hlist_add_head(&sp->hash_link, bucket); if (!metaphysical) { - rmap_write_protect(vcpu->kvm, gfn); + if (rmap_write_protect(vcpu->kvm, gfn)) + kvm_flush_remote_tlbs(vcpu->kvm); account_shadowed(vcpu->kvm, gfn); } if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) @@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker, if (level == PT32E_ROOT_LEVEL) { shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; shadow_addr &= PT64_BASE_ADDR_MASK; + if (!shadow_addr) + return 1; --level; } @@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) } } -struct zap_walker { - struct kvm_unsync_walk walker; - struct kvm *kvm; - int zapped; -}; - -static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) +static int mmu_zap_unsync_children(struct kvm *kvm, + struct kvm_mmu_page *parent) { - struct zap_walker *zap_walk = container_of(walk, struct zap_walker, - walker); - kvm_mmu_zap_page(zap_walk->kvm, sp); - zap_walk->zapped = 1; - return 0; -} + int i, zapped = 0; + struct mmu_page_path parents; + struct kvm_mmu_pages pages; -static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - struct zap_walker walker = { - .walker = { .entry = mmu_zap_fn, }, - .kvm = kvm, - .zapped = 0, - }; - - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (parent->role.level == PT_PAGE_TABLE_LEVEL) return 0; - mmu_unsync_walk(sp, &walker.walker); - return walker.zapped; + + kvm_mmu_pages_init(parent, &parents, &pages); + while (mmu_unsync_walk(parent, &pages)) { + struct kvm_mmu_page *sp; + + for_each_sp(pages, sp, parents, i) { + kvm_mmu_zap_page(kvm, sp); + mmu_pages_clear_parents(&parents); + } + zapped += pages.nr; + kvm_mmu_pages_init(parent, &parents, &pages); + } + + return zapped; } static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); struct kvm_mmu_page *sp = page_header(__pa(pte)); - __set_bit(slot, &sp->slot_bitmap); + __set_bit(slot, sp->slot_bitmap); } static void mmu_convert_notrap(struct kvm_mmu_page *sp) @@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) return page; } +/* + * The function is based on mtrr_type_lookup() in + * arch/x86/kernel/cpu/mtrr/generic.c + */ +static int get_mtrr_type(struct mtrr_state_type *mtrr_state, + u64 start, u64 end) +{ + int i; + u64 base, mask; + u8 prev_match, curr_match; + int num_var_ranges = KVM_NR_VAR_MTRR; + + if (!mtrr_state->enabled) + return 0xFF; + + /* Make end inclusive end, instead of exclusive */ + end--; + + /* Look in fixed ranges. Just return the type as per start */ + if (mtrr_state->have_fixed && (start < 0x100000)) { + int idx; + + if (start < 0x80000) { + idx = 0; + idx += (start >> 16); + return mtrr_state->fixed_ranges[idx]; + } else if (start < 0xC0000) { + idx = 1 * 8; + idx += ((start - 0x80000) >> 14); + return mtrr_state->fixed_ranges[idx]; + } else if (start < 0x1000000) { + idx = 3 * 8; + idx += ((start - 0xC0000) >> 12); + return mtrr_state->fixed_ranges[idx]; + } + } + + /* + * Look in variable ranges + * Look of multiple ranges matching this address and pick type + * as per MTRR precedence + */ + if (!(mtrr_state->enabled & 2)) + return mtrr_state->def_type; + + prev_match = 0xFF; + for (i = 0; i < num_var_ranges; ++i) { + unsigned short start_state, end_state; + + if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) + continue; + + base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + + (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); + mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + + (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); + + start_state = ((start & mask) == (base & mask)); + end_state = ((end & mask) == (base & mask)); + if (start_state != end_state) + return 0xFE; + + if ((start & mask) != (base & mask)) + continue; + + curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; + if (prev_match == 0xFF) { + prev_match = curr_match; + continue; + } + + if (prev_match == MTRR_TYPE_UNCACHABLE || + curr_match == MTRR_TYPE_UNCACHABLE) + return MTRR_TYPE_UNCACHABLE; + + if ((prev_match == MTRR_TYPE_WRBACK && + curr_match == MTRR_TYPE_WRTHROUGH) || + (prev_match == MTRR_TYPE_WRTHROUGH && + curr_match == MTRR_TYPE_WRBACK)) { + prev_match = MTRR_TYPE_WRTHROUGH; + curr_match = MTRR_TYPE_WRTHROUGH; + } + + if (prev_match != curr_match) + return MTRR_TYPE_UNCACHABLE; + } + + if (prev_match != 0xFF) + return prev_match; + + return mtrr_state->def_type; +} + +static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) +{ + u8 mtrr; + + mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, + (gfn << PAGE_SHIFT) + PAGE_SIZE); + if (mtrr == 0xfe || mtrr == 0xff) + mtrr = MTRR_TYPE_WRBACK; + return mtrr; +} + static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { unsigned index; @@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (s->role.word != sp->role.word) return 1; } - kvm_mmu_mark_parents_unsync(vcpu, sp); ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; + + if (sp->global) { + list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); + ++vcpu->kvm->stat.mmu_unsync_global; + } else + kvm_mmu_mark_parents_unsync(vcpu, sp); + mmu_convert_notrap(sp); return 0; } @@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pte_access, int user_fault, int write_fault, int dirty, int largepage, - gfn_t gfn, pfn_t pfn, bool speculative, + int global, gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync) { u64 spte; int ret = 0; + u64 mt_mask = shadow_mt_mask; + struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); + + if (!(vcpu->arch.cr4 & X86_CR4_PGE)) + global = 0; + if (!global && sp->global) { + sp->global = 0; + if (sp->unsync) { + kvm_unlink_unsync_global(vcpu->kvm, sp); + kvm_mmu_mark_parents_unsync(vcpu, sp); + } + } + /* * We don't set the accessed bit, since we sometimes want to see * whether the guest actually used the pte (in order to detect @@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; + if (mt_mask) { + mt_mask = get_memory_type(vcpu, gfn) << + kvm_x86_ops->get_mt_mask_shift(); + spte |= mt_mask; + } spte |= (u64)pfn << PAGE_SHIFT; @@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= PT_WRITABLE_MASK; + /* + * Optimization: for pte sync, if spte was writable the hash + * lookup is unnecessary (and expensive). Write protection + * is responsibility of mmu_get_page / kvm_sync_page. + * Same reasoning can be applied to dirty page accounting. + */ + if (!can_unsync && is_writeble_pte(*shadow_pte)) + goto set_pte; + if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { pgprintk("%s: found shadow page for %lx, marking ro\n", __func__, gfn); @@ -1495,8 +1746,8 @@ set_pte: static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, int largepage, gfn_t gfn, - pfn_t pfn, bool speculative) + int *ptwrite, int largepage, int global, + gfn_t gfn, pfn_t pfn, bool speculative) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); @@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, - dirty, largepage, gfn, pfn, speculative, true)) { + dirty, largepage, global, gfn, pfn, speculative, true)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, 0, walk->write, 1, &walk->pt_write, - walk->largepage, gfn, walk->pfn, false); + walk->largepage, 0, gfn, walk->pfn, false); ++vcpu->stat.pf_fixed; return 1; } @@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) } } +static void mmu_sync_global(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_page *sp, *n; + + list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) + kvm_sync_page(vcpu, sp); +} + void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->kvm->mmu_lock); @@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); } +void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_sync_global(vcpu); + spin_unlock(&vcpu->kvm->mmu_lock); +} + static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; @@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + const u8 *new, int bytes, + bool guest_initiated) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *sp; @@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, "pre pte write"); - if (gfn == vcpu->arch.last_pt_write_gfn - && !last_updated_pte_accessed(vcpu)) { - ++vcpu->arch.last_pt_write_count; - if (vcpu->arch.last_pt_write_count >= 3) - flooded = 1; - } else { - vcpu->arch.last_pt_write_gfn = gfn; - vcpu->arch.last_pt_write_count = 1; - vcpu->arch.last_pte_updated = NULL; + if (guest_initiated) { + if (gfn == vcpu->arch.last_pt_write_gfn + && !last_updated_pte_accessed(vcpu)) { + ++vcpu->arch.last_pt_write_count; + if (vcpu->arch.last_pt_write_count >= 3) + flooded = 1; + } else { + vcpu->arch.last_pt_write_gfn = gfn; + vcpu->arch.last_pt_write_count = 1; + vcpu->arch.last_pte_updated = NULL; + } } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; @@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - spin_lock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.invlpg(vcpu, gva); - spin_unlock(&vcpu->kvm->mmu_lock); kvm_mmu_flush_tlb(vcpu); ++vcpu->stat.invlpg; } @@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) int i; u64 *pt; - if (!test_bit(slot, &sp->slot_bitmap)) + if (!test_bit(slot, sp->slot_bitmap)) continue; pt = sp->spt; @@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) if (sp->role.metaphysical) continue; - slot = gfn_to_memslot(vcpu->kvm, sp->gfn); gfn = unalias_gfn(vcpu->kvm, sp->gfn); + slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); rmapp = &slot->rmap[gfn - slot->base_gfn]; if (*rmapp) printk(KERN_ERR "%s: (%s) shadow page has writable" diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 84eee43bbe74..9fd78b6e17ad 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -82,6 +82,7 @@ struct shadow_walker { int *ptwrite; pfn_t pfn; u64 *sptep; + gpa_t pte_gpa; }; static gfn_t gpte_to_gfn(pt_element_t gpte) @@ -222,7 +223,7 @@ walk: if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); + kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); walker->ptes[walker->level - 1] = pte; } @@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, return; kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, - gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), + gpte & PT_DIRTY_MASK, NULL, largepage, + gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), pfn, true); } @@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, sw->user_fault, sw->write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, - sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, - false); + sw->ptwrite, sw->largepage, + gw->ptes[gw->level-1] & PT_GLOBAL_MASK, + gw->gfn, sw->pfn, false); sw->sptep = sptep; return 1; } @@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, struct kvm_vcpu *vcpu, u64 addr, u64 *sptep, int level) { + struct shadow_walker *sw = + container_of(_sw, struct shadow_walker, walker); - if (level == PT_PAGE_TABLE_LEVEL) { - if (is_shadow_present_pte(*sptep)) + /* FIXME: properly handle invlpg on large guest pages */ + if (level == PT_PAGE_TABLE_LEVEL || + ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + sw->pte_gpa = (sp->gfn << PAGE_SHIFT); + sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); + + if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); + if (is_large_pte(*sptep)) + --vcpu->kvm->stat.lpages; + } set_shadow_pte(sptep, shadow_trap_nonpresent_pte); return 1; } @@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { + pt_element_t gpte; struct shadow_walker walker = { .walker = { .entry = FNAME(shadow_invlpg_entry), }, + .pte_gpa = -1, }; + spin_lock(&vcpu->kvm->mmu_lock); walk_shadow(&walker.walker, vcpu, gva); + spin_unlock(&vcpu->kvm->mmu_lock); + if (walker.pte_gpa == -1) + return; + if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, + sizeof(pt_element_t))) + return; + if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, + sizeof(pt_element_t), 0); + } } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) @@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gfn, + is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, spte_to_pfn(sp->spt[i]), true, false); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9c4ce657d963..1452851ae258 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,8 @@ #include <asm/desc.h> +#include <asm/virtext.h> + #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); @@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static int has_svm(void) { - uint32_t eax, ebx, ecx, edx; - - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - printk(KERN_INFO "has_svm: not amd\n"); - return 0; - } + const char *msg; - cpuid(0x80000000, &eax, &ebx, &ecx, &edx); - if (eax < SVM_CPUID_FUNC) { - printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); + if (!cpu_has_svm(&msg)) { + printk(KERN_INFO "has_svn: %s\n", msg); return 0; } - cpuid(0x80000001, &eax, &ebx, &ecx, &edx); - if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { - printk(KERN_DEBUG "has_svm: svm not available\n"); - return 0; - } return 1; } static void svm_hardware_disable(void *garbage) { - uint64_t efer; - - wrmsrl(MSR_VM_HSAVE_PA, 0); - rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); + cpu_svm_disable(); } static void svm_hardware_enable(void *garbage) @@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; + + /* + * SVM always stores 0 for the 'G' bit in the CS selector in + * the VMCB on a VMEXIT. This hurts cross-vendor migration: + * Intel's VMENTRY has a check on the 'G' bit. + */ + if (seg == VCPU_SREG_CS) + var->g = s->limit > 0xfffff; + + /* + * Work around a bug where the busy flag in the tr selector + * isn't exposed + */ + if (seg == VCPU_SREG_TR) + var->type |= 0x2; + var->unusable = !var->present; } @@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) rep = (io_info & SVM_IOIO_REP_MASK) != 0; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; + skip_emulated_instruction(&svm->vcpu); return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } @@ -1912,6 +1916,11 @@ static int get_npt_level(void) #endif } +static int svm_get_mt_mask_shift(void) +{ + return 0; +} + static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, + .get_mt_mask_shift = svm_get_mt_mask_shift, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a4018b01e1f9..6259d7467648 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -16,7 +16,6 @@ */ #include "irq.h" -#include "vmx.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -31,6 +30,8 @@ #include <asm/io.h> #include <asm/desc.h> +#include <asm/vmx.h> +#include <asm/virtext.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -90,6 +91,11 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; + + /* Support for vnmi-less CPUs */ + int soft_vnmi_blocked; + ktime_t entry_time; + s64 vnmi_blocked_time; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -122,7 +128,7 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; -struct vmx_capability { +static struct vmx_capability { u32 ept; u32 vpid; } vmx_capability; @@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); break; + case MSR_IA32_CR_PAT: + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + vmcs_write64(GUEST_IA32_PAT, data); + vcpu->arch.pat = data; + break; + } + /* Otherwise falls through to kvm_set_msr_common */ default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - unsigned long ecx = cpuid_ecx(1); - return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ + return cpu_has_vmx(); } static __init int vmx_disabled_by_bios(void) @@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void) __vcpu_clear(vmx); } -static void hardware_disable(void *garbage) + +/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() + * tricks. + */ +static void kvm_cpu_vmxoff(void) { - vmclear_local_vcpus(); asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); write_cr4(read_cr4() & ~X86_CR4_VMXE); } +static void hardware_disable(void *garbage) +{ + vmclear_local_vcpus(); + kvm_cpu_vmxoff(); +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = 0; + opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; - min = opt = 0; + min = 0; + opt = VM_ENTRY_LOAD_IA32_PAT; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) */ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - u32 host_sysenter_cs; + u32 host_sysenter_cs, msr_low, msr_high; u32 junk; + u64 host_pat; unsigned long a; struct descriptor_table dt; int i; @@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ + if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + vmcs_write64(HOST_IA32_PAT, host_pat); + } + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { + rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); + host_pat = msr_low | ((u64) msr_high << 32); + /* Write the default value follow host pat */ + vmcs_write64(GUEST_IA32_PAT, host_pat); + /* Keep arch.pat sync with GUEST_IA32_PAT */ + vmx->vcpu.arch.pat = host_pat; + } + for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -2335,6 +2374,29 @@ out: return ret; } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) { + enable_irq_window(vcpu); + return; + } + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!cpu_has_virtual_nmis()) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->soft_vnmi_blocked = 1; + vmx->vnmi_blocked_time = 0; + } + + ++vcpu->stat.nmi_injections; + if (vcpu->arch.rmode.active) { + vmx->rmode.irq.pending = true; + vmx->rmode.irq.vector = NMI_VECTOR; + vmx->rmode.irq.rip = kvm_rip_read(vcpu); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + NMI_VECTOR | INTR_TYPE_SOFT_INTR | + INTR_INFO_VALID_MASK); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); + return; + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } +static void vmx_update_window_states(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + + vcpu->arch.nmi_window_open = + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_NMI)); + if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) + vcpu->arch.nmi_window_open = 0; + + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(guest_intr & (GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_MOV_SS))); +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) kvm_queue_interrupt(vcpu, irq); } - static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 cpu_based_vm_exec_control; - - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + vmx_update_window_states(vcpu); - if (vcpu->arch.interrupt_window_open && - vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) - kvm_do_inject_irq(vcpu); + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); + return; + } + } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (vcpu->arch.irq_summary + || kvm_run->request_interrupt_window) + enable_irq_window(vcpu); + return; + } - if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + if (vcpu->arch.interrupt_window_open) { + if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) + kvm_do_inject_irq(vcpu); - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + if (vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + } if (!vcpu->arch.interrupt_window_open && (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - /* - * Interrupts blocked. Wait for unblock. - */ - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - else - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + enable_irq_window(vcpu); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = 8, + .slot = TSS_PRIVATE_MEMSLOT, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, @@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; + skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } @@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); KVMTRACE_0D(PEND_INTR, vcpu, handler); + ++vcpu->stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as @@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->arch.irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; int reason; @@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; + if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && + (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) + == INTR_TYPE_NMI_INTR) { + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } tss_selector = exit_qualification; return kvm_task_switch(vcpu, tss_selector, reason); @@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - switch (err) { - case EMULATE_DONE: - break; - case EMULATE_DO_MMIO: - kvm_report_emulation_failure(vcpu, "mmio"); - /* TODO: Handle MMIO */ - return; - default: - kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + if (err == EMULATE_DO_MMIO) + break; + + if (err != EMULATE_DONE) { + kvm_report_emulation_failure(vcpu, "emulation failure"); + return; } if (signal_pending(current)) @@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now, no more emulation should be needed */ - vmx->emulation_required = 0; + /* Guest state should be valid now except if we need to + * emulate an MMIO */ + if (guest_state_valid(vcpu)) + vmx->emulation_required = 0; } /* @@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); + /* If we need to emulate an MMIO from handle_invalid_guest_state + * we just return 0 */ + if (vmx->emulation_required && emulate_invalid_guest_state) + return 0; + /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ if (vm_need_ept() && is_paging(vcpu)) { @@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION)) - printk(KERN_WARNING "%s: unexpected, valid vectoring info and " - "exit reason is 0x%x\n", __func__, exit_reason); + exit_reason != EXIT_REASON_EPT_VIOLATION && + exit_reason != EXIT_REASON_TASK_SWITCH)) + printk(KERN_WARNING "%s: unexpected, valid vectoring info " + "(0x%x) and exit reason is 0x%x\n", + __func__, vectoring_info, exit_reason); + + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { + if (vcpu->arch.interrupt_window_open) { + vmx->soft_vnmi_blocked = 0; + vcpu->arch.nmi_window_open = 1; + } else if (vmx->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->soft_vnmi_blocked = 0; + vmx->vcpu.arch.nmi_window_open = 1; + } + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); } -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - if (!cpu_has_virtual_nmis()) - return; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return !(guest_intr & (GUEST_INTR_STATE_NMI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)); -} - -static int vmx_irq_enabled(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_STI)) && - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); -} - -static void enable_intr_window(struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); -} - static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) if (unblock_nmi && vector != DF_VECTOR) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - } + } else if (unlikely(vmx->soft_vnmi_blocked)) + vmx->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); idt_vectoring_info = vmx->idt_vectoring_info; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { update_tpr_threshold(vcpu); - if (cpu_has_virtual_nmis()) { - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vmx_nmi_enabled(vcpu)) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_intr_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - enable_intr_window(vcpu); + vmx_update_window_states(vcpu); + + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vcpu->arch.nmi_window_open) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_nmi_window(vcpu); return; } } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); + return; + } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vmx_irq_enabled(vcpu)) + if (vcpu->arch.interrupt_window_open) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); + if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); } } @@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info; + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) + vmx->entry_time = ktime_get(); + /* Handle invalid guest state instead of entering VMX */ if (vmx->emulation_required && emulate_invalid_guest_state) { handle_invalid_guest_state(vcpu, kvm_run); @@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vcpu->arch.interrupt_window_open = - (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; + vmx_update_window_states(vcpu); asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); @@ -3455,6 +3571,11 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } +static int vmx_get_mt_mask_shift(void) +{ + return VMX_EPT_MT_EPTE_SHIFT; +} + static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, + .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3566,10 +3688,10 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK); + VMX_EPT_EXECUTABLE_MASK, + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); kvm_enable_tdp(); } else kvm_disable_tdp(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1f8ff2f1fa2..0e6aa8141dcd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -39,6 +39,7 @@ #include <asm/uaccess.h> #include <asm/msr.h> #include <asm/desc.h> +#include <asm/mtrr.h> #define MAX_IO_MSRS 256 #define CR0_RESERVED_BITS \ @@ -86,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, + { "request_nmi", VCPU_STAT(request_nmi_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, { "efer_reload", VCPU_STAT(efer_reload) }, @@ -93,6 +95,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "insn_emulation", VCPU_STAT(insn_emulation) }, { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "irq_injections", VCPU_STAT(irq_injections) }, + { "nmi_injections", VCPU_STAT(nmi_injections) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -101,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, { NULL } @@ -312,6 +316,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->arch.cr0 = cr0; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); return; } @@ -355,6 +360,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) } kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; + kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -449,7 +455,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, - MSR_IA32_PERF_STATUS, + MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT }; static unsigned num_msrs_to_save; @@ -648,10 +654,38 @@ static bool msr_mtrr_valid(unsigned msr) static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - vcpu->arch.mtrr[msr - 0x200] = data; + if (msr == MSR_MTRRdefType) { + vcpu->arch.mtrr_state.def_type = data; + vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; + } else if (msr == MSR_MTRRfix64K_00000) + p[0] = data; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + p[1 + msr - MSR_MTRRfix16K_80000] = data; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + p[3 + msr - MSR_MTRRfix4K_C0000] = data; + else if (msr == MSR_IA32_CR_PAT) + vcpu->arch.pat = data; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pt = data; + } + + kvm_mmu_reset_context(vcpu); return 0; } @@ -747,10 +781,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { + u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; + if (!msr_mtrr_valid(msr)) return 1; - *pdata = vcpu->arch.mtrr[msr - 0x200]; + if (msr == MSR_MTRRdefType) + *pdata = vcpu->arch.mtrr_state.def_type + + (vcpu->arch.mtrr_state.enabled << 10); + else if (msr == MSR_MTRRfix64K_00000) + *pdata = p[0]; + else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) + *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; + else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) + *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; + else if (msr == MSR_IA32_CR_PAT) + *pdata = vcpu->arch.pat; + else { /* Variable MTRRs */ + int idx, is_mtrr_mask; + u64 *pt; + + idx = (msr - 0x200) / 2; + is_mtrr_mask = msr - 0x200 - 2 * idx; + if (!is_mtrr_mask) + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; + else + pt = + (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; + *pdata = *pt; + } + return 0; } @@ -903,7 +964,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: case KVM_CAP_HLT: case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: - case KVM_CAP_USER_MEMORY: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: case KVM_CAP_CLOCKSOURCE: @@ -1188,6 +1248,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, int t, times = entry->eax & 0xff; entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times && *nent < maxnent; ++t) { do_cpuid_1_ent(&entry[t], function, 0); entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; @@ -1218,7 +1279,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; *nent < maxnent; ++i) { - level_type = entry[i - 1].ecx & 0xff; + level_type = entry[i - 1].ecx & 0xff00; if (!level_type) break; do_cpuid_1_ent(&entry[i], function, i); @@ -1318,6 +1379,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_inject_nmi(vcpu); + vcpu_put(vcpu); + + return 0; +} + static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -1377,6 +1447,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } + case KVM_NMI: { + r = kvm_vcpu_ioctl_nmi(vcpu); + if (r) + goto out; + r = 0; + break; + } case KVM_SET_CPUID: { struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; @@ -1968,7 +2045,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) return 0; - kvm_mmu_pte_write(vcpu, gpa, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); return 1; } @@ -2404,8 +2481,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, val = kvm_register_read(vcpu, VCPU_REGS_RAX); memcpy(vcpu->arch.pio_data, &val, 4); - kvm_x86_ops->skip_emulated_instruction(vcpu); - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); @@ -2541,7 +2616,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0); + PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); return 0; out: @@ -2729,7 +2804,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; /* when no next entry is found, the current entry[i] is reselected */ - for (j = i + 1; j == i; j = (j + 1) % nent) { + for (j = i + 1; ; j = (j + 1) % nent) { struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; if (ej->function == e->function) { ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; @@ -2973,7 +3048,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_x86_ops->vcpu_reset(vcpu); + r = kvm_arch_vcpu_reset(vcpu); if (r) return r; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -3275,9 +3350,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, kvm_desct->padding = 0; } -static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, - u16 selector, - struct descriptor_table *dtable) +static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, + u16 selector, + struct descriptor_table *dtable) { if (selector & 1 << 2) { struct kvm_segment kvm_seg; @@ -3302,7 +3377,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descritptor_dtable(vcpu, selector, &dtable); + get_segment_descriptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) { kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); @@ -3321,7 +3396,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct descriptor_table dtable; u16 index = selector >> 3; - get_segment_descritptor_dtable(vcpu, selector, &dtable); + get_segment_descriptor_dtable(vcpu, selector, &dtable); if (dtable.limit < index * 8 + 7) return 1; @@ -3900,6 +3975,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* We do fxsave: this must be aligned. */ BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); + vcpu->arch.mtrr_state.have_fixed = 1; vcpu_load(vcpu); r = kvm_arch_vcpu_reset(vcpu); if (r == 0) @@ -3925,6 +4001,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = false; + return kvm_x86_ops->vcpu_reset(vcpu); } @@ -4012,6 +4091,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.oos_global_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ @@ -4048,8 +4128,8 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - kvm_iommu_unmap_guest(kvm); kvm_free_all_assigned_devices(kvm); + kvm_iommu_unmap_guest(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); @@ -4127,7 +4207,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE - || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; + || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED + || vcpu->arch.nmi_pending; } static void vcpu_kick_intr(void *info) diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ea051173b0da..d174db7a3370 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -58,6 +58,7 @@ #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ #define SrcImm (5<<4) /* Immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ +#define SrcOne (7<<4) /* Implied '1' */ #define SrcMask (7<<4) /* Generic ModRM decode. */ #define ModRM (1<<7) @@ -70,17 +71,23 @@ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ #define GroupMask 0xff /* Group number stored in bits 0:7 */ +/* Source 2 operand type */ +#define Src2None (0<<29) +#define Src2CL (1<<29) +#define Src2ImmByte (2<<29) +#define Src2One (3<<29) +#define Src2Mask (7<<29) enum { Group1_80, Group1_81, Group1_82, Group1_83, Group1A, Group3_Byte, Group3, Group4, Group5, Group7, }; -static u16 opcode_table[256] = { +static u32 opcode_table[256] = { /* 0x00 - 0x07 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, 0, 0, + ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, /* 0x08 - 0x0F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -195,7 +202,7 @@ static u16 opcode_table[256] = { ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, }; -static u16 twobyte_table[256] = { +static u32 twobyte_table[256] = { /* 0x00 - 0x0F */ 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, @@ -230,9 +237,14 @@ static u16 twobyte_table[256] = { /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + DstMem | SrcReg | Src2ImmByte | ModRM, + DstMem | SrcReg | Src2CL | ModRM, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, + DstMem | SrcReg | Src2ImmByte | ModRM, + DstMem | SrcReg | Src2CL | ModRM, + ModRM, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, DstMem | SrcReg | ModRM | BitOp, @@ -253,7 +265,7 @@ static u16 twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -static u16 group_table[] = { +static u32 group_table[] = { [Group1_80*8] = ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, @@ -297,9 +309,9 @@ static u16 group_table[] = { SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, }; -static u16 group2_table[] = { +static u32 group2_table[] = { [Group7*8] = - SrcNone | ModRM, 0, 0, 0, + SrcNone | ModRM, 0, 0, SrcNone | ModRM, SrcNone | ModRM | DstMem | Mov, 0, SrcMem16 | ModRM | Mov, 0, }; @@ -359,49 +371,48 @@ static u16 group2_table[] = { "andl %"_msk",%"_LO32 _tmp"; " \ "orl %"_LO32 _tmp",%"_sav"; " +#ifdef CONFIG_X86_64 +#define ON64(x) x +#else +#define ON64(x) +#endif + +#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ + do { \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "4", "2") \ + _op _suffix " %"_x"3,%1; " \ + _POST_EFLAGS("0", "4", "2") \ + : "=m" (_eflags), "=m" ((_dst).val), \ + "=&r" (_tmp) \ + : _y ((_src).val), "i" (EFLAGS_MASK)); \ + } while (0) + + /* Raw emulation: instruction has two explicit operands. */ #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ - do { \ - unsigned long _tmp; \ - \ - switch ((_dst).bytes) { \ - case 2: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"w %"_wx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : _wy ((_src).val), "i" (EFLAGS_MASK)); \ - break; \ - case 4: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"l %"_lx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : _ly ((_src).val), "i" (EFLAGS_MASK)); \ - break; \ - case 8: \ - __emulate_2op_8byte(_op, _src, _dst, \ - _eflags, _qx, _qy); \ - break; \ - } \ + do { \ + unsigned long _tmp; \ + \ + switch ((_dst).bytes) { \ + case 2: \ + ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ + break; \ + case 4: \ + ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ + break; \ + case 8: \ + ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ + break; \ + } \ } while (0) #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ do { \ - unsigned long __tmp; \ + unsigned long _tmp; \ switch ((_dst).bytes) { \ case 1: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"b %"_bx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (__tmp) \ - : _by ((_src).val), "i" (EFLAGS_MASK)); \ + ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ break; \ default: \ __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ @@ -425,71 +436,68 @@ static u16 group2_table[] = { __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ "w", "r", _LO32, "r", "", "r") -/* Instruction has only one explicit operand (no source operand). */ -#define emulate_1op(_op, _dst, _eflags) \ - do { \ - unsigned long _tmp; \ - \ - switch ((_dst).bytes) { \ - case 1: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"b %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 2: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"w %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 4: \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"l %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), \ - "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ - break; \ - case 8: \ - __emulate_1op_8byte(_op, _dst, _eflags); \ - break; \ - } \ +/* Instruction has three operands and one operand is stored in ECX register */ +#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ + do { \ + unsigned long _tmp; \ + _type _clv = (_cl).val; \ + _type _srcv = (_src).val; \ + _type _dstv = (_dst).val; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "5", "2") \ + _op _suffix " %4,%1 \n" \ + _POST_EFLAGS("0", "5", "2") \ + : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ + : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ + ); \ + \ + (_cl).val = (unsigned long) _clv; \ + (_src).val = (unsigned long) _srcv; \ + (_dst).val = (unsigned long) _dstv; \ } while (0) -/* Emulate an instruction with quadword operands (x86/64 only). */ -#if defined(CONFIG_X86_64) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "4", "2") \ - _op"q %"_qx"3,%1; " \ - _POST_EFLAGS("0", "4", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : _qy ((_src).val), "i" (EFLAGS_MASK)); \ +#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 2: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "w", unsigned short); \ + break; \ + case 4: \ + __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "l", unsigned int); \ + break; \ + case 8: \ + ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ + "q", unsigned long)); \ + break; \ + } \ } while (0) -#define __emulate_1op_8byte(_op, _dst, _eflags) \ - do { \ - __asm__ __volatile__ ( \ - _PRE_EFLAGS("0", "3", "2") \ - _op"q %1; " \ - _POST_EFLAGS("0", "3", "2") \ - : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ - : "i" (EFLAGS_MASK)); \ +#define __emulate_1op(_op, _dst, _eflags, _suffix) \ + do { \ + unsigned long _tmp; \ + \ + __asm__ __volatile__ ( \ + _PRE_EFLAGS("0", "3", "2") \ + _op _suffix " %1; " \ + _POST_EFLAGS("0", "3", "2") \ + : "=m" (_eflags), "+m" ((_dst).val), \ + "=&r" (_tmp) \ + : "i" (EFLAGS_MASK)); \ } while (0) -#elif defined(__i386__) -#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) -#define __emulate_1op_8byte(_op, _dst, _eflags) -#endif /* __i386__ */ +/* Instruction has only one explicit operand (no source operand). */ +#define emulate_1op(_op, _dst, _eflags) \ + do { \ + switch ((_dst).bytes) { \ + case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ + case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ + case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ + case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ + } \ + } while (0) /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _size, _eip) \ @@ -1041,6 +1049,33 @@ done_prefixes: c->src.bytes = 1; c->src.val = insn_fetch(s8, 1, c->eip); break; + case SrcOne: + c->src.bytes = 1; + c->src.val = 1; + break; + } + + /* + * Decode and fetch the second source operand: register, memory + * or immediate. + */ + switch (c->d & Src2Mask) { + case Src2None: + break; + case Src2CL: + c->src2.bytes = 1; + c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; + break; + case Src2ImmByte: + c->src2.type = OP_IMM; + c->src2.ptr = (unsigned long *)c->eip; + c->src2.bytes = 1; + c->src2.val = insn_fetch(u8, 1, c->eip); + break; + case Src2One: + c->src2.bytes = 1; + c->src2.val = 1; + break; } /* Decode and fetch the destination operand: register or memory. */ @@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) c->regs[VCPU_REGS_RSP]); } -static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_pop(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; int rc; - rc = ops->read_std(register_address(c, ss_base(ctxt), - c->regs[VCPU_REGS_RSP]), - &c->dst.val, c->dst.bytes, ctxt->vcpu); + rc = ops->read_emulated(register_address(c, ss_base(ctxt), + c->regs[VCPU_REGS_RSP]), + &c->src.val, c->src.bytes, ctxt->vcpu); if (rc != 0) return rc; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); + return rc; +} + +static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + c->src.bytes = c->dst.bytes; + rc = emulate_pop(ctxt, ops); + if (rc != 0) + return rc; + c->dst.val = c->src.val; return 0; } @@ -1415,24 +1463,15 @@ special_insn: emulate_1op("dec", c->dst, ctxt->eflags); break; case 0x50 ... 0x57: /* push reg */ - c->dst.type = OP_MEM; - c->dst.bytes = c->op_bytes; - c->dst.val = c->src.val; - register_address_increment(c, &c->regs[VCPU_REGS_RSP], - -c->op_bytes); - c->dst.ptr = (void *) register_address( - c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); + emulate_push(ctxt); break; case 0x58 ... 0x5f: /* pop reg */ pop_instruction: - if ((rc = ops->read_std(register_address(c, ss_base(ctxt), - c->regs[VCPU_REGS_RSP]), c->dst.ptr, - c->op_bytes, ctxt->vcpu)) != 0) + c->src.bytes = c->op_bytes; + rc = emulate_pop(ctxt, ops); + if (rc != 0) goto done; - - register_address_increment(c, &c->regs[VCPU_REGS_RSP], - c->op_bytes); - c->dst.type = OP_NONE; /* Disable writeback. */ + c->dst.val = c->src.val; break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) @@ -1591,7 +1630,9 @@ special_insn: emulate_push(ctxt); break; case 0x9d: /* popf */ + c->dst.type = OP_REG; c->dst.ptr = (unsigned long *) &ctxt->eflags; + c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xa0 ... 0xa1: /* mov */ c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; @@ -1689,7 +1730,9 @@ special_insn: emulate_grp2(ctxt); break; case 0xc3: /* ret */ + c->dst.type = OP_REG; c->dst.ptr = &c->eip; + c->dst.bytes = c->op_bytes; goto pop_instruction; case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ mov: @@ -1778,7 +1821,7 @@ special_insn: c->eip = saved_eip; goto cannot_emulate; } - return 0; + break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; break; @@ -1999,12 +2042,20 @@ twobyte_insn: c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); break; + case 0xa4: /* shld imm8, r, r/m */ + case 0xa5: /* shld cl, r, r/m */ + emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); + break; case 0xab: bts: /* bts */ /* only subword offset */ c->src.val &= (c->dst.bytes << 3) - 1; emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); break; + case 0xac: /* shrd imm8, r, r/m */ + case 0xad: /* shrd cl, r, r/m */ + emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); + break; case 0xae: /* clflush */ break; case 0xb0 ... 0xb1: /* cmpxchg */ diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50a779264bb1..a7ed208f81e3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -738,7 +738,7 @@ static void lguest_time_init(void) /* We can't set cpumask in the initializer: damn C limitations! Set it * here and register our timer device. */ - lguest_clockevent.cpumask = cpumask_of_cpu(0); + lguest_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 37b9ae4d44c5..df167f265622 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -133,29 +133,28 @@ void __init time_init_hook(void) **/ void mca_nmi_hook(void) { - /* If I recall correctly, there's a whole bunch of other things that + /* + * If I recall correctly, there's a whole bunch of other things that * we can do to check for NMI problems, but that's all I know about * at the moment. */ - - printk("NMI generated from unknown source!\n"); + pr_warning("NMI generated from unknown source!\n"); } #endif static __init int no_ipi_broadcast(char *str) { get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); return 1; } - __setup("no_ipi_broadcast=", no_ipi_broadcast); static int __init print_ipi_mode(void) { - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); return 0; } diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3624a364b7f3..bc4c7840b2a8 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -42,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { - return cpumask_of_cpu(cpu); + cpus_clear(*retmask); + cpu_set(cpu, *retmask); } static int probe_bigsmp(void) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 7b4e6d0d1690..4ba5ccaa1584 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -87,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -97,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 71a309b122e6..511d7941364f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 2c6d234e0009..2821ffc188b5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -24,7 +24,7 @@ static int probe_summit(void) return 0; } -static cpumask_t vector_allocation_domain(int cpu) +static void vector_allocation_domain(int cpu, cpumask_t *retmask) { /* Careful. Some cpus do not strictly honor the set of cpus * specified in the interrupt destination when using lowest @@ -34,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu) * deliver interrupts to the wrong hyperthread when only one * hyperthread was specified in the interrupt desitination. */ - cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; - return domain; + *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; } struct genapic apic_summit = APIC_INIT("summit", probe_summit); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 52145007bd7e..a5bc05492b1e 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1; /* Used for the invalidate map that's also checked in the spinlock */ static volatile unsigned long smp_invalidate_needed; -/* Bitmask of currently online CPUs - used by setup.c for - /proc/cpuinfo, visible externally but still physical */ -cpumask_t cpu_online_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); - /* Bitmask of CPUs present in the system - exported by i386_syms.c, used * by scheduler but indexed physically */ cpumask_t phys_cpu_present_map = CPU_MASK_NONE; @@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* This is for the new dynamic CPU boot code */ cpumask_t cpu_callin_map = CPU_MASK_NONE; cpumask_t cpu_callout_map = CPU_MASK_NONE; -cpumask_t cpu_possible_map = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_possible_map); /* The per processor IRQ masks (these are usually kept in sync) */ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; @@ -679,7 +672,7 @@ void __init smp_boot_cpus(void) /* loop over all the extended VIC CPUs and boot them. The * Quad CPUs must be bootstrapped by their extended VIC cpu */ - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) continue; do_boot_cpu(i); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8655b5bb0963..f99a6c6c432e 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -435,8 +435,12 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init() do { } while (0) +static inline void permanent_kmaps_init(pgd_t *pgd_base) +{ +} +static inline void set_highmem_pages_init(void) +{ +} #endif /* CONFIG_HIGHMEM */ void __init native_pagetable_setup_start(pgd_t *base) diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index cebcbf152d46..71a14f89f89e 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -278,7 +278,7 @@ void __init numa_init_array(void) int rr, i; rr = first_node(node_online_map); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); @@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) memnodemap[0] = 0; node_set_online(0); node_set(0, node_possible_map); - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) numa_set_node(i, 0); e820_register_active_regions(0, start_pfn, last_pfn); setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51c0a2fc14fe..09737c8af074 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) if (!node_online(i)) setup_node_bootmem(i, nodes[i].start, nodes[i].end); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { int node = early_cpu_to_node(i); if (node == NUMA_NO_NODE) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1d88d2b39771..9e5752fe4d15 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,7 +4,7 @@ #include <linux/irq.h> #include <linux/dmi.h> #include <asm/numa.h> -#include "pci.h" +#include <asm/pci_x86.h> struct pci_root_info { char *name; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 22e057665e55..9bb09823b362 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -2,7 +2,7 @@ #include <linux/pci.h> #include <linux/topology.h> #include <linux/cpu.h> -#include "pci.h" +#include <asm/pci_x86.h> #ifdef CONFIG_X86_64 #include <asm/pci-direct.h> diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bb1a01f089e2..62ddb73e09ed 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -14,8 +14,7 @@ #include <asm/segment.h> #include <asm/io.h> #include <asm/smp.h> - -#include "pci.h" +#include <asm/pci_x86.h> unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9a5af6c8fbe9..bd13c3e4c6db 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -5,7 +5,7 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/dmi.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * Functions for accessing PCI base (first 256 bytes) and extended diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 86631ccbc25a..f6adf2c6d751 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -2,7 +2,7 @@ #include <linux/pci.h> #include <asm/pci-direct.h> #include <asm/io.h> -#include "pci.h" +#include <asm/pci_x86.h> /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2051dc96b8e9..7d388d5cf548 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -6,8 +6,7 @@ #include <linux/dmi.h> #include <linux/pci.h> #include <linux/init.h> -#include "pci.h" - +#include <asm/pci_x86.h> static void __devinit pci_fixup_i450nx(struct pci_dev *d) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 844df0cbbd3e..e51bf2cda4b0 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,8 +34,8 @@ #include <asm/pat.h> #include <asm/e820.h> +#include <asm/pci_x86.h> -#include "pci.h" static int skip_isa_ioresource_align(struct pci_dev *dev) { diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index d6c950f81858..bec3b048e72b 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -1,6 +1,6 @@ #include <linux/pci.h> #include <linux/init.h> -#include "pci.h" +#include <asm/pci_x86.h> /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bf69dbe08bff..373b9afe6d44 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -16,8 +16,7 @@ #include <asm/io_apic.h> #include <linux/irq.h> #include <linux/acpi.h> - -#include "pci.h" +#include <asm/pci_x86.h> #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index b722dd481b39..f1065b129e9c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -3,7 +3,7 @@ */ #include <linux/init.h> #include <linux/pci.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * Discover remaining PCI buses in case there are peer host bridges. diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 654a2234f8f3..89bf9242c80a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,8 +15,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> #include <asm/e820.h> - -#include "pci.h" +#include <asm/pci_x86.h> /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f3c761dce695..8b2d561046a3 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -13,7 +13,7 @@ #include <linux/init.h> #include <linux/acpi.h> #include <asm/e820.h> -#include "pci.h" +#include <asm/pci_x86.h> /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index a1994163c99d..30007ffc8e11 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -10,8 +10,7 @@ #include <linux/acpi.h> #include <linux/bitmap.h> #include <asm/e820.h> - -#include "pci.h" +#include <asm/pci_x86.h> /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1177845d3186..2089354968a2 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -7,7 +7,7 @@ #include <linux/nodemask.h> #include <mach_apic.h> #include <asm/mpspec.h> -#include "pci.h" +#include <asm/pci_x86.h> #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index e11e9e803d5f..b889d824f7c6 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -29,7 +29,7 @@ #include <linux/init.h> #include <asm/olpc.h> #include <asm/geode.h> -#include "pci.h" +#include <asm/pci_x86.h> /* * In the tables below, the first two line (8 longwords) are the diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 37472fc6f729..b82cae970dfd 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -6,9 +6,8 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/uaccess.h> -#include "pci.h" -#include "pci-functions.h" - +#include <asm/pci_x86.h> +#include <asm/mach-default/pci-functions.h> /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 42f4cb19faca..16d0c0eb0d19 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -9,11 +9,10 @@ #include <linux/init.h> #include <asm/setup.h> +#include <asm/pci_x86.h> #include <asm/visws/cobalt.h> #include <asm/visws/lithium.h> -#include "pci.h" - static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } static void pci_visws_disable_irq(struct pci_dev *dev) { } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 773d68d3e912..503c240e26c7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1082,7 +1082,7 @@ static void drop_other_mm_ref(void *info) static void xen_drop_mm_ref(struct mm_struct *mm) { - cpumask_t mask; + cpumask_var_t mask; unsigned cpu; if (current->active_mm == mm) { @@ -1094,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm) } /* Get the "official" set of cpus referring to our pagetable. */ - mask = mm->cpu_vm_mask; + if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { + for_each_online_cpu(cpu) { + if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) + && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) + continue; + smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); + } + return; + } + cpumask_copy(mask, &mm->cpu_vm_mask); /* It's possible that a vcpu may have a stale reference to our cr3, because its in lazy mode, and it hasn't yet flushed @@ -1103,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) if needed. */ for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) - cpu_set(cpu, mask); + cpumask_set_cpu(cpu, mask); } - if (!cpus_empty(mask)) - smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); + if (!cpumask_empty(mask)) + smp_call_function_many(mask, drop_other_mm_ref, mm, 1); + free_cpumask_var(mask); } #else static void xen_drop_mm_ref(struct mm_struct *mm) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index acd9b6705e02..c44e2069c7c7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -33,7 +33,7 @@ #include "xen-ops.h" #include "mmu.h" -cpumask_t xen_cpu_initialized_map; +cpumask_var_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) { int i, rc; - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); if (rc >= 0) { num_processors++; @@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) if (xen_smp_intr_init(0)) BUG(); - xen_cpu_initialized_map = cpumask_of_cpu(0); + if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) + panic("could not allocate xen_cpu_initialized_map\n"); + + cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { - for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) + for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) continue; cpu_clear(cpu, cpu_possible_map); } @@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) struct vcpu_guest_context *ctxt; struct desc_struct *gdt; - if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) + if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) return 0; ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); @@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu) xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); } -static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) +static void xen_send_IPI_mask(const struct cpumask *mask, + enum ipi_vector vector) { unsigned cpu; - cpus_and(mask, mask, cpu_online_map); - - for_each_cpu_mask_nr(cpu, mask) + for_each_cpu_and(cpu, mask, cpu_online_mask) xen_send_IPI_one(cpu, vector); } -static void xen_smp_send_call_function_ipi(cpumask_t mask) +static void xen_smp_send_call_function_ipi(const struct cpumask *mask) { int cpu; xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); /* Make sure other vcpus get a chance to run if they need to. */ - for_each_cpu_mask_nr(cpu, mask) { + for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); break; @@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) static void xen_smp_send_call_function_single_ipi(int cpu) { - xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); + xen_send_IPI_mask(cpumask_of(cpu), + XEN_CALL_FUNCTION_SINGLE_VECTOR); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 2a234db5949b..212ffe012b76 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled) pfn_to_mfn(xen_start_info->console.domU.mfn); } else { #ifdef CONFIG_SMP - xen_cpu_initialized_map = cpu_online_map; + BUG_ON(xen_cpu_initialized_map == NULL); + cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); #endif xen_vcpu_restore(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c9f7cda48ed7..14f240623497 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -132,8 +132,7 @@ static void do_stolen_accounting(void) *snap = state; /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. Passing NULL to - account_steal_time accounts the time as stolen. */ + including any left-overs from last time. */ stolen = runnable + offline + __get_cpu_var(residual_stolen); if (stolen < 0) @@ -141,11 +140,10 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; - account_steal_time(NULL, ticks); + account_steal_ticks(ticks); /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. Passing idle to - account_steal_time accounts the time as idle/wait. */ + including any left-overs from last time. */ blocked += __get_cpu_var(residual_blocked); if (blocked < 0) @@ -153,7 +151,7 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; - account_steal_time(idle_task(smp_processor_id()), ticks); + account_idle_ticks(ticks); } /* @@ -437,7 +435,7 @@ void xen_setup_timer(int cpu) evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); - evt->cpumask = cpumask_of_cpu(cpu); + evt->cpumask = cpumask_of(cpu); evt->irq = irq; setup_runstate_info(cpu); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e1afae8461f..c1f8faf0a2c5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); __cpuinit void xen_init_lock_cpu(int cpu); void xen_uninit_lock_cpu(int cpu); -extern cpumask_t xen_cpu_initialized_map; +extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} #endif diff --git a/arch/xtensa/kernel/init_task.c b/arch/xtensa/kernel/init_task.c index 3df469dbe814..e07f5c9fcd35 100644 --- a/arch/xtensa/kernel/init_task.c +++ b/arch/xtensa/kernel/init_task.c @@ -21,7 +21,6 @@ #include <asm/uaccess.h> -static struct fs_struct init_fs = INIT_FS; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 64f5d54f7edc..4259072f5bd0 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); */ static ssize_t print_cpus_map(char *buf, cpumask_t *map) { - int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map); + int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map); buf[n++] = '\n'; buf[n] = '\0'; diff --git a/drivers/base/node.c b/drivers/base/node.c index f5207090885a..91636cd8b6c9 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); len = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, *mask): - cpumask_scnprintf(buf, PAGE_SIZE-2, *mask); + cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 199cd97e32e6..a8bc1cbcfa7c 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) if (len > 1) { n = type? - cpulist_scnprintf(buf, len-2, *mask): - cpumask_scnprintf(buf, len-2, *mask); + cpulist_scnprintf(buf, len-2, mask) : + cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index c602b547cc6e..1697043119bd 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -190,7 +190,7 @@ config DIGIEPCA config ESPSERIAL tristate "Hayes ESP serial port support" - depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API + depends on SERIAL_NONSTANDARD && ISA && ISA_DMA_API && BROKEN help This is a driver which supports Hayes ESP serial ports. Both single port cards and multiport cards are supported. Make sure to read @@ -443,6 +443,17 @@ config UNIX98_PTYS All modern Linux systems use the Unix98 ptys. Say Y unless you're on an embedded system and want to conserve memory. +config DEVPTS_MULTIPLE_INSTANCES + bool "Support multiple instances of devpts" + depends on UNIX98_PTYS + default n + ---help--- + Enable support for multiple instances of devpts filesystem. + If you want to have isolated PTY namespaces (eg: in containers), + say Y here. Otherwise, say N. If enabled, each mount of devpts + filesystem with the '-o newinstance' option will create an + independent PTY namespace. + config LEGACY_PTYS bool "Legacy (BSD) PTY support" default y diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index b97aebd7aeb8..4e0cfdeab146 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -170,7 +170,7 @@ static __inline__ void rtsdtr_ctrl(int bits) */ static void rs_stop(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_stop")) @@ -190,7 +190,7 @@ static void rs_stop(struct tty_struct *tty) static void rs_start(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_start")) @@ -861,7 +861,7 @@ static int rs_put_char(struct tty_struct *tty, unsigned char ch) static void rs_flush_chars(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_flush_chars")) @@ -934,7 +934,7 @@ static int rs_write(struct tty_struct * tty, const unsigned char *buf, int count static int rs_write_room(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; if (serial_paranoia_check(info, tty->name, "rs_write_room")) return 0; @@ -943,7 +943,7 @@ static int rs_write_room(struct tty_struct *tty) static int rs_chars_in_buffer(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; if (serial_paranoia_check(info, tty->name, "rs_chars_in_buffer")) return 0; @@ -952,7 +952,7 @@ static int rs_chars_in_buffer(struct tty_struct *tty) static void rs_flush_buffer(struct tty_struct *tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_flush_buffer")) @@ -969,7 +969,7 @@ static void rs_flush_buffer(struct tty_struct *tty) */ static void rs_send_xchar(struct tty_struct *tty, char ch) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_send_char")) @@ -1004,7 +1004,7 @@ static void rs_send_xchar(struct tty_struct *tty, char ch) */ static void rs_throttle(struct tty_struct * tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; #ifdef SERIAL_DEBUG_THROTTLE char buf[64]; @@ -1029,7 +1029,7 @@ static void rs_throttle(struct tty_struct * tty) static void rs_unthrottle(struct tty_struct * tty) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; #ifdef SERIAL_DEBUG_THROTTLE char buf[64]; @@ -1194,7 +1194,7 @@ static int get_lsr_info(struct async_struct * info, unsigned int __user *value) static int rs_tiocmget(struct tty_struct *tty, struct file *file) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; unsigned char control, status; unsigned long flags; @@ -1217,7 +1217,7 @@ static int rs_tiocmget(struct tty_struct *tty, struct file *file) static int rs_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_ioctl")) @@ -1244,7 +1244,7 @@ static int rs_tiocmset(struct tty_struct *tty, struct file *file, */ static int rs_break(struct tty_struct *tty, int break_state) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; unsigned long flags; if (serial_paranoia_check(info, tty->name, "rs_break")) @@ -1264,7 +1264,7 @@ static int rs_break(struct tty_struct *tty, int break_state) static int rs_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; struct async_icount cprev, cnow; /* kernel counter temps */ struct serial_icounter_struct icount; void __user *argp = (void __user *)arg; @@ -1368,7 +1368,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file, static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info = tty->driver_data; unsigned long flags; unsigned int cflag = tty->termios->c_cflag; @@ -1428,7 +1428,7 @@ static void rs_set_termios(struct tty_struct *tty, struct ktermios *old_termios) */ static void rs_close(struct tty_struct *tty, struct file * filp) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; struct serial_state *state; unsigned long flags; @@ -1523,7 +1523,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp) */ static void rs_wait_until_sent(struct tty_struct *tty, int timeout) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; unsigned long orig_jiffies, char_time; int lsr; @@ -1587,7 +1587,7 @@ static void rs_wait_until_sent(struct tty_struct *tty, int timeout) */ static void rs_hangup(struct tty_struct *tty) { - struct async_struct * info = (struct async_struct *)tty->driver_data; + struct async_struct * info = tty->driver_data; struct serial_state *state = info->state; if (serial_paranoia_check(info, tty->name, "rs_hangup")) diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 5e5b1dc1a0a7..6a59f72a9c21 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -5010,7 +5010,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, if (nchan == 0) { dev_err(&pdev->dev, "Cyclom-Y PCI host card with no " "Serial-Modules\n"); - return -EIO; + goto err_unmap; } } else if (device_id == PCI_DEVICE_ID_CYCLOM_Z_Hi) { struct RUNTIME_9060 __iomem *ctl_addr; diff --git a/drivers/char/epca.c b/drivers/char/epca.c index cf2461d34e5f..39ad820b2350 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -69,7 +69,9 @@ static int invalid_lilo_config; /* * The ISA boards do window flipping into the same spaces so its only sane with - * a single lock. It's still pretty efficient. + * a single lock. It's still pretty efficient. This lock guards the hardware + * and the tty_port lock guards the kernel side stuff like use counts. Take + * this lock inside the port lock if you must take both. */ static DEFINE_SPINLOCK(epca_lock); @@ -156,14 +158,12 @@ static struct channel *verifyChannel(struct tty_struct *); static void pc_sched_event(struct channel *, int); static void epca_error(int, char *); static void pc_close(struct tty_struct *, struct file *); -static void shutdown(struct channel *); +static void shutdown(struct channel *, struct tty_struct *tty); static void pc_hangup(struct tty_struct *); static int pc_write_room(struct tty_struct *); static int pc_chars_in_buffer(struct tty_struct *); static void pc_flush_buffer(struct tty_struct *); static void pc_flush_chars(struct tty_struct *); -static int block_til_ready(struct tty_struct *, struct file *, - struct channel *); static int pc_open(struct tty_struct *, struct file *); static void post_fep_init(unsigned int crd); static void epcapoll(unsigned long); @@ -173,7 +173,7 @@ static unsigned termios2digi_h(struct channel *ch, unsigned); static unsigned termios2digi_i(struct channel *ch, unsigned); static unsigned termios2digi_c(struct channel *ch, unsigned); static void epcaparam(struct tty_struct *, struct channel *); -static void receive_data(struct channel *); +static void receive_data(struct channel *, struct tty_struct *tty); static int pc_ioctl(struct tty_struct *, struct file *, unsigned int, unsigned long); static int info_ioctl(struct tty_struct *, struct file *, @@ -392,7 +392,7 @@ static struct channel *verifyChannel(struct tty_struct *tty) * through tty->driver_data this should catch it. */ if (tty) { - struct channel *ch = (struct channel *)tty->driver_data; + struct channel *ch = tty->driver_data; if (ch >= &digi_channels[0] && ch < &digi_channels[nbdevs]) { if (ch->magic == EPCA_MAGIC) return ch; @@ -419,76 +419,34 @@ static void epca_error(int line, char *msg) static void pc_close(struct tty_struct *tty, struct file *filp) { struct channel *ch; - unsigned long flags; + struct tty_port *port; /* * verifyChannel returns the channel from the tty struct if it is * valid. This serves as a sanity check. */ ch = verifyChannel(tty); - if (ch != NULL) { - spin_lock_irqsave(&epca_lock, flags); - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&epca_lock, flags); - return; - } - if (ch->port.count-- > 1) { - /* Begin channel is open more than once */ - /* - * Return without doing anything. Someone might still - * be using the channel. - */ - spin_unlock_irqrestore(&epca_lock, flags); - return; - } - /* Port open only once go ahead with shutdown & reset */ - BUG_ON(ch->port.count < 0); - - /* - * Let the rest of the driver know the channel is being closed. - * This becomes important if an open is attempted before close - * is finished. - */ - ch->port.flags |= ASYNC_CLOSING; - tty->closing = 1; - - spin_unlock_irqrestore(&epca_lock, flags); - - if (ch->port.flags & ASYNC_INITIALIZED) { - /* Setup an event to indicate when the - transmit buffer empties */ - setup_empty_event(tty, ch); - /* 30 seconds timeout */ - tty_wait_until_sent(tty, 3000); - } - pc_flush_buffer(tty); + if (ch == NULL) + return; + port = &ch->port; - tty_ldisc_flush(tty); - shutdown(ch); + if (tty_port_close_start(port, tty, filp) == 0) + return; - spin_lock_irqsave(&epca_lock, flags); - tty->closing = 0; - ch->event = 0; - ch->port.tty = NULL; - spin_unlock_irqrestore(&epca_lock, flags); + pc_flush_buffer(tty); + shutdown(ch, tty); - if (ch->port.blocked_open) { - if (ch->close_delay) - msleep_interruptible(jiffies_to_msecs(ch->close_delay)); - wake_up_interruptible(&ch->port.open_wait); - } - ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED | - ASYNC_CLOSING); - wake_up_interruptible(&ch->port.close_wait); - } + tty_port_close_end(port, tty); + ch->event = 0; /* FIXME: review ch->event locking */ + tty_port_tty_set(port, NULL); } -static void shutdown(struct channel *ch) +static void shutdown(struct channel *ch, struct tty_struct *tty) { unsigned long flags; - struct tty_struct *tty; struct board_chan __iomem *bc; + struct tty_port *port = &ch->port; - if (!(ch->port.flags & ASYNC_INITIALIZED)) + if (!(port->flags & ASYNC_INITIALIZED)) return; spin_lock_irqsave(&epca_lock, flags); @@ -503,7 +461,6 @@ static void shutdown(struct channel *ch) */ if (bc) writeb(0, &bc->idata); - tty = ch->port.tty; /* If we're a modem control device and HUPCL is on, drop RTS & DTR. */ if (tty->termios->c_cflag & HUPCL) { @@ -517,32 +474,26 @@ static void shutdown(struct channel *ch) * will have to reinitialized. Set a flag to indicate this. */ /* Prevent future Digi programmed interrupts from coming active */ - ch->port.flags &= ~ASYNC_INITIALIZED; + port->flags &= ~ASYNC_INITIALIZED; spin_unlock_irqrestore(&epca_lock, flags); } static void pc_hangup(struct tty_struct *tty) { struct channel *ch; + /* * verifyChannel returns the channel from the tty struct if it is * valid. This serves as a sanity check. */ ch = verifyChannel(tty); if (ch != NULL) { - unsigned long flags; - pc_flush_buffer(tty); tty_ldisc_flush(tty); - shutdown(ch); + shutdown(ch, tty); - spin_lock_irqsave(&epca_lock, flags); - ch->port.tty = NULL; - ch->event = 0; - ch->port.count = 0; - ch->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_INITIALIZED); - spin_unlock_irqrestore(&epca_lock, flags); - wake_up_interruptible(&ch->port.open_wait); + ch->event = 0; /* FIXME: review locking of ch->event */ + tty_port_hangup(&ch->port); } } @@ -786,100 +737,22 @@ static void pc_flush_chars(struct tty_struct *tty) } } -static int block_til_ready(struct tty_struct *tty, - struct file *filp, struct channel *ch) +static int epca_carrier_raised(struct tty_port *port) { - DECLARE_WAITQUEUE(wait, current); - int retval, do_clocal = 0; - unsigned long flags; - - if (tty_hung_up_p(filp)) { - if (ch->port.flags & ASYNC_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - return retval; - } - - /* - * If the device is in the middle of being closed, then block until - * it's done, and then try again. - */ - if (ch->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&ch->port.close_wait); - - if (ch->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - else - return -ERESTARTSYS; - } - - if (filp->f_flags & O_NONBLOCK) { - /* - * If non-blocking mode is set, then make the check up front - * and then exit. - */ - ch->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; - } - if (tty->termios->c_cflag & CLOCAL) - do_clocal = 1; - /* Block waiting for the carrier detect and the line to become free */ - - retval = 0; - add_wait_queue(&ch->port.open_wait, &wait); - - spin_lock_irqsave(&epca_lock, flags); - /* We dec count so that pc_close will know when to free things */ - if (!tty_hung_up_p(filp)) - ch->port.count--; - ch->port.blocked_open++; - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || - !(ch->port.flags & ASYNC_INITIALIZED)) { - if (ch->port.flags & ASYNC_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - break; - } - if (!(ch->port.flags & ASYNC_CLOSING) && - (do_clocal || (ch->imodem & ch->dcd))) - break; - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - spin_unlock_irqrestore(&epca_lock, flags); - /* - * Allow someone else to be scheduled. We will occasionally go - * through this loop until one of the above conditions change. - * The below schedule call will allow other processes to enter - * and prevent this loop from hogging the cpu. - */ - schedule(); - spin_lock_irqsave(&epca_lock, flags); - } - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&ch->port.open_wait, &wait); - if (!tty_hung_up_p(filp)) - ch->port.count++; - ch->port.blocked_open--; - - spin_unlock_irqrestore(&epca_lock, flags); - - if (retval) - return retval; - - ch->port.flags |= ASYNC_NORMAL_ACTIVE; + struct channel *ch = container_of(port, struct channel, port); + if (ch->imodem & ch->dcd) + return 1; return 0; } +static void epca_raise_dtr_rts(struct tty_port *port) +{ +} + static int pc_open(struct tty_struct *tty, struct file *filp) { struct channel *ch; + struct tty_port *port; unsigned long flags; int line, retval, boardnum; struct board_chan __iomem *bc; @@ -890,6 +763,7 @@ static int pc_open(struct tty_struct *tty, struct file *filp) return -ENODEV; ch = &digi_channels[line]; + port = &ch->port; boardnum = ch->boardnum; /* Check status of board configured in system. */ @@ -926,22 +800,24 @@ static int pc_open(struct tty_struct *tty, struct file *filp) return -ENODEV; } - spin_lock_irqsave(&epca_lock, flags); + spin_lock_irqsave(&port->lock, flags); /* * Every time a channel is opened, increment a counter. This is * necessary because we do not wish to flush and shutdown the channel * until the last app holding the channel open, closes it. */ - ch->port.count++; + port->count++; /* * Set a kernel structures pointer to our local channel structure. This * way we can get to it when passed only a tty struct. */ tty->driver_data = ch; + port->tty = tty; /* * If this is the first time the channel has been opened, initialize * the tty->termios struct otherwise let pc_close handle it. */ + spin_lock(&epca_lock); globalwinon(ch); ch->statusflags = 0; @@ -956,31 +832,33 @@ static int pc_open(struct tty_struct *tty, struct file *filp) writew(head, &bc->rout); /* Set the channels associated tty structure */ - ch->port.tty = tty; /* * The below routine generally sets up parity, baud, flow control * issues, etc.... It effect both control flags and input flags. */ epcaparam(tty, ch); - ch->port.flags |= ASYNC_INITIALIZED; memoff(ch); - spin_unlock_irqrestore(&epca_lock, flags); + spin_unlock(&epca_lock); + port->flags |= ASYNC_INITIALIZED; + spin_unlock_irqrestore(&port->lock, flags); - retval = block_til_ready(tty, filp, ch); + retval = tty_port_block_til_ready(port, tty, filp); if (retval) return retval; /* * Set this again in case a hangup set it to zero while this open() was * waiting for the line... */ - spin_lock_irqsave(&epca_lock, flags); - ch->port.tty = tty; + spin_lock_irqsave(&port->lock, flags); + port->tty = tty; + spin_lock(&epca_lock); globalwinon(ch); /* Enable Digi Data events */ writeb(1, &bc->idata); memoff(ch); - spin_unlock_irqrestore(&epca_lock, flags); + spin_unlock(&epca_lock); + spin_unlock_irqrestore(&port->lock, flags); return 0; } @@ -1016,8 +894,11 @@ static void __exit epca_module_exit(void) } ch = card_ptr[crd]; for (count = 0; count < bd->numports; count++, ch++) { - if (ch && ch->port.tty) - tty_hangup(ch->port.tty); + struct tty_struct *tty = tty_port_tty_get(&ch->port); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } } } pci_unregister_driver(&epca_driver); @@ -1042,6 +923,11 @@ static const struct tty_operations pc_ops = { .break_ctl = pc_send_break }; +static const struct tty_port_operations epca_port_ops = { + .carrier_raised = epca_carrier_raised, + .raise_dtr_rts = epca_raise_dtr_rts, +}; + static int info_open(struct tty_struct *tty, struct file *filp) { return 0; @@ -1377,6 +1263,7 @@ static void post_fep_init(unsigned int crd) u16 tseg, rseg; tty_port_init(&ch->port); + ch->port.ops = &epca_port_ops; ch->brdchan = bc; ch->mailbox = gd; INIT_WORK(&ch->tqueue, do_softint); @@ -1428,7 +1315,7 @@ static void post_fep_init(unsigned int crd) ch->boardnum = crd; ch->channelnum = i; ch->magic = EPCA_MAGIC; - ch->port.tty = NULL; + tty_port_tty_set(&ch->port, NULL); if (shrinkmem) { fepcmd(ch, SETBUFFER, 32, 0, 0, 0); @@ -1510,7 +1397,7 @@ static void post_fep_init(unsigned int crd) ch->fepstartca = 0; ch->fepstopca = 0; - ch->close_delay = 50; + ch->port.close_delay = 50; spin_unlock_irqrestore(&epca_lock, flags); } @@ -1622,15 +1509,16 @@ static void doevent(int crd) if (bc == NULL) goto next; + tty = tty_port_tty_get(&ch->port); if (event & DATA_IND) { /* Begin DATA_IND */ - receive_data(ch); + receive_data(ch, tty); assertgwinon(ch); } /* End DATA_IND */ /* else *//* Fix for DCD transition missed bug */ if (event & MODEMCHG_IND) { /* A modem signal change has been indicated */ ch->imodem = mstat; - if (ch->port.flags & ASYNC_CHECK_CD) { + if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) { /* We are now receiving dcd */ if (mstat & ch->dcd) wake_up_interruptible(&ch->port.open_wait); @@ -1638,7 +1526,6 @@ static void doevent(int crd) pc_sched_event(ch, EPCA_EVENT_HANGUP); } } - tty = ch->port.tty; if (tty) { if (event & BREAK_IND) { /* A break has been indicated */ @@ -1658,6 +1545,7 @@ static void doevent(int crd) tty_wakeup(tty); } } + tty_kref_put(tty); } next: globalwinon(ch); @@ -1877,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch) * that the driver will wait on carrier detect. */ if (ts->c_cflag & CLOCAL) - ch->port.flags &= ~ASYNC_CHECK_CD; + clear_bit(ASYNC_CHECK_CD, &ch->port.flags); else - ch->port.flags |= ASYNC_CHECK_CD; + set_bit(ASYNC_CHECK_CD, &ch->port.flags); mval = ch->m_dtr | ch->m_rts; } /* End CBAUD not detected */ iflag = termios2digi_i(ch, ts->c_iflag); @@ -1952,11 +1840,10 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch) } /* Caller holds lock */ -static void receive_data(struct channel *ch) +static void receive_data(struct channel *ch, struct tty_struct *tty) { unchar *rptr; struct ktermios *ts = NULL; - struct tty_struct *tty; struct board_chan __iomem *bc; int dataToRead, wrapgap, bytesAvailable; unsigned int tail, head; @@ -1969,7 +1856,6 @@ static void receive_data(struct channel *ch) globalwinon(ch); if (ch->statusflags & RXSTOPPED) return; - tty = ch->port.tty; if (tty) ts = tty->termios; bc = ch->brdchan; @@ -2029,7 +1915,7 @@ static void receive_data(struct channel *ch) globalwinon(ch); writew(tail, &bc->rout); /* Must be called with global data */ - tty_schedule_flip(ch->port.tty); + tty_schedule_flip(tty); } static int info_ioctl(struct tty_struct *tty, struct file *file, @@ -2097,7 +1983,7 @@ static int info_ioctl(struct tty_struct *tty, struct file *file, static int pc_tiocmget(struct tty_struct *tty, struct file *file) { - struct channel *ch = (struct channel *) tty->driver_data; + struct channel *ch = tty->driver_data; struct board_chan __iomem *bc; unsigned int mstat, mflag = 0; unsigned long flags; @@ -2131,7 +2017,7 @@ static int pc_tiocmget(struct tty_struct *tty, struct file *file) static int pc_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct channel *ch = (struct channel *) tty->driver_data; + struct channel *ch = tty->driver_data; unsigned long flags; if (!ch) @@ -2178,7 +2064,7 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file, unsigned int mflag, mstat; unsigned char startc, stopc; struct board_chan __iomem *bc; - struct channel *ch = (struct channel *) tty->driver_data; + struct channel *ch = tty->driver_data; void __user *argp = (void __user *)arg; if (ch) @@ -2352,15 +2238,16 @@ static void do_softint(struct work_struct *work) struct channel *ch = container_of(work, struct channel, tqueue); /* Called in response to a modem change event */ if (ch && ch->magic == EPCA_MAGIC) { - struct tty_struct *tty = ch->port.tty; + struct tty_struct *tty = tty_port_tty_get(&ch->port);; if (tty && tty->driver_data) { if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) { tty_hangup(tty); wake_up_interruptible(&ch->port.open_wait); - ch->port.flags &= ~ASYNC_NORMAL_ACTIVE; + clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags); } } + tty_kref_put(tty); } } @@ -2473,7 +2360,7 @@ static void pc_unthrottle(struct tty_struct *tty) static int pc_send_break(struct tty_struct *tty, int msec) { - struct channel *ch = (struct channel *) tty->driver_data; + struct channel *ch = tty->driver_data; unsigned long flags; if (msec == -1) diff --git a/drivers/char/esp.c b/drivers/char/esp.c index 7f077c0097f6..45ec263ec012 100644 --- a/drivers/char/esp.c +++ b/drivers/char/esp.c @@ -2054,6 +2054,15 @@ static void esp_hangup(struct tty_struct *tty) wake_up_interruptible(&info->port.open_wait); } +static int esp_carrier_raised(struct tty_port *port) +{ + struct esp_struct *info = container_of(port, struct esp_struct, port); + serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT); + if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD) + return 1; + return 0; +} + /* * ------------------------------------------------------------ * esp_open() and friends @@ -2066,17 +2075,19 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, int retval; int do_clocal = 0; unsigned long flags; + int cd; + struct tty_port *port = &info->port; /* * If the device is in the middle of being closed, then block * until it's done, and then try again. */ if (tty_hung_up_p(filp) || - (info->port.flags & ASYNC_CLOSING)) { - if (info->port.flags & ASYNC_CLOSING) - interruptible_sleep_on(&info->port.close_wait); + (port->flags & ASYNC_CLOSING)) { + if (port->flags & ASYNC_CLOSING) + interruptible_sleep_on(&port->close_wait); #ifdef SERIAL_DO_RESTART - if (info->port.flags & ASYNC_HUP_NOTIFY) + if (port->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; else return -ERESTARTSYS; @@ -2091,7 +2102,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, */ if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) { - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -2101,20 +2112,20 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, /* * Block waiting for the carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, info->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * rs_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&info->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); #ifdef SERIAL_DEBUG_OPEN printk(KERN_DEBUG "block_til_ready before block: ttys%d, count = %d\n", - info->line, info->port.count); + info->line, port->count); #endif spin_lock_irqsave(&info->lock, flags); if (!tty_hung_up_p(filp)) - info->port.count--; - info->port.blocked_open++; + port->count--; + port->blocked_open++; while (1) { if ((tty->termios->c_cflag & CBAUD)) { unsigned int scratch; @@ -2129,9 +2140,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, } set_current_state(TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || - !(info->port.flags & ASYNC_INITIALIZED)) { + !(port->flags & ASYNC_INITIALIZED)) { #ifdef SERIAL_DO_RESTART - if (info->port.flags & ASYNC_HUP_NOTIFY) + if (port->flags & ASYNC_HUP_NOTIFY) retval = -EAGAIN; else retval = -ERESTARTSYS; @@ -2141,11 +2152,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, break; } - serial_out(info, UART_ESI_CMD1, ESI_GET_UART_STAT); - if (serial_in(info, UART_ESI_STAT2) & UART_MSR_DCD) - do_clocal = 1; + cd = tty_port_carrier_raised(port); - if (!(info->port.flags & ASYNC_CLOSING) && + if (!(port->flags & ASYNC_CLOSING) && (do_clocal)) break; if (signal_pending(current)) { @@ -2154,25 +2163,25 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, } #ifdef SERIAL_DEBUG_OPEN printk(KERN_DEBUG "block_til_ready blocking: ttys%d, count = %d\n", - info->line, info->port.count); + info->line, port->count); #endif spin_unlock_irqrestore(&info->lock, flags); schedule(); spin_lock_irqsave(&info->lock, flags); } set_current_state(TASK_RUNNING); - remove_wait_queue(&info->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); if (!tty_hung_up_p(filp)) - info->port.count++; - info->port.blocked_open--; + port->count++; + port->blocked_open--; spin_unlock_irqrestore(&info->lock, flags); #ifdef SERIAL_DEBUG_OPEN printk(KERN_DEBUG "block_til_ready after blocking: ttys%d, count = %d\n", - info->line, info->port.count); + info->line, port->count); #endif if (retval) return retval; - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -2329,6 +2338,10 @@ static const struct tty_operations esp_ops = { .tiocmset = esp_tiocmset, }; +static const struct tty_port_operations esp_port_ops = { + .esp_carrier_raised, +}; + /* * The serial driver boot-time initialization code! */ @@ -2415,6 +2428,8 @@ static int __init espserial_init(void) offset = 0; do { + tty_port_init(&info->port); + info->port.ops = &esp_port_ops; info->io_port = esp[i] + offset; info->irq = irq[i]; info->line = (i * 8) + (offset / 8); @@ -2437,8 +2452,6 @@ static int __init espserial_init(void) info->config.flow_off = flow_off; info->config.pio_threshold = pio_threshold; info->next_port = ports; - init_waitqueue_head(&info->port.open_wait); - init_waitqueue_head(&info->port.close_wait); init_waitqueue_head(&info->delta_msr_wait); init_waitqueue_head(&info->break_wait); ports = info; diff --git a/drivers/char/generic_serial.c b/drivers/char/generic_serial.c index c6090f84a2e4..9e4e569dc00d 100644 --- a/drivers/char/generic_serial.c +++ b/drivers/char/generic_serial.c @@ -376,7 +376,8 @@ static void gs_shutdown_port (struct gs_port *port) void gs_hangup(struct tty_struct *tty) { - struct gs_port *port; + struct gs_port *port; + unsigned long flags; func_enter (); @@ -386,9 +387,11 @@ void gs_hangup(struct tty_struct *tty) return; gs_shutdown_port (port); + spin_lock_irqsave(&port->port.lock, flags); port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|GS_ACTIVE); port->port.tty = NULL; port->port.count = 0; + spin_unlock_irqrestore(&port->port.lock, flags); wake_up_interruptible(&port->port.open_wait); func_exit (); @@ -397,7 +400,8 @@ void gs_hangup(struct tty_struct *tty) int gs_block_til_ready(void *port_, struct file * filp) { - struct gs_port *port = port_; + struct gs_port *gp = port_; + struct tty_port *port = &gp->port; DECLARE_WAITQUEUE(wait, current); int retval; int do_clocal = 0; @@ -409,16 +413,16 @@ int gs_block_til_ready(void *port_, struct file * filp) if (!port) return 0; - tty = port->port.tty; + tty = port->tty; gs_dprintk (GS_DEBUG_BTR, "Entering gs_block_till_ready.\n"); /* * If the device is in the middle of being closed, then block * until it's done, and then try again. */ - if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&port->port.close_wait); - if (port->port.flags & ASYNC_HUP_NOTIFY) + if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { + interruptible_sleep_on(&port->close_wait); + if (port->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; else return -ERESTARTSYS; @@ -432,7 +436,7 @@ int gs_block_til_ready(void *port_, struct file * filp) */ if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) { - port->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -444,34 +448,34 @@ int gs_block_til_ready(void *port_, struct file * filp) /* * Block waiting for the carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, port->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * rs_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&port->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); gs_dprintk (GS_DEBUG_BTR, "after add waitq.\n"); - spin_lock_irqsave(&port->driver_lock, flags); + spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) { - port->port.count--; + port->count--; } - spin_unlock_irqrestore(&port->driver_lock, flags); - port->port.blocked_open++; + port->blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); while (1) { - CD = port->rd->get_CD (port); + CD = tty_port_carrier_raised(port); gs_dprintk (GS_DEBUG_BTR, "CD is now %d.\n", CD); set_current_state (TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp) || - !(port->port.flags & ASYNC_INITIALIZED)) { - if (port->port.flags & ASYNC_HUP_NOTIFY) + !(port->flags & ASYNC_INITIALIZED)) { + if (port->flags & ASYNC_HUP_NOTIFY) retval = -EAGAIN; else retval = -ERESTARTSYS; break; } - if (!(port->port.flags & ASYNC_CLOSING) && + if (!(port->flags & ASYNC_CLOSING) && (do_clocal || CD)) break; gs_dprintk (GS_DEBUG_BTR, "signal_pending is now: %d (%lx)\n", @@ -483,19 +487,20 @@ int gs_block_til_ready(void *port_, struct file * filp) schedule(); } gs_dprintk (GS_DEBUG_BTR, "Got out of the loop. (%d)\n", - port->port.blocked_open); + port->blocked_open); set_current_state (TASK_RUNNING); - remove_wait_queue(&port->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); + + spin_lock_irqsave(&port->lock, flags); if (!tty_hung_up_p(filp)) { - port->port.count++; + port->count++; } - port->port.blocked_open--; - if (retval) - return retval; - - port->port.flags |= ASYNC_NORMAL_ACTIVE; + port->blocked_open--; + if (retval == 0) + port->flags |= ASYNC_NORMAL_ACTIVE; + spin_unlock_irqrestore(&port->lock, flags); func_exit (); - return 0; + return retval; } @@ -506,7 +511,7 @@ void gs_close(struct tty_struct * tty, struct file * filp) func_enter (); - port = (struct gs_port *) tty->driver_data; + port = tty->driver_data; if (!port) return; @@ -516,10 +521,10 @@ void gs_close(struct tty_struct * tty, struct file * filp) port->port.tty = tty; } - spin_lock_irqsave(&port->driver_lock, flags); + spin_lock_irqsave(&port->port.lock, flags); if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&port->driver_lock, flags); + spin_unlock_irqrestore(&port->port.lock, flags); if (port->rd->hungup) port->rd->hungup (port); func_exit (); @@ -538,7 +543,7 @@ void gs_close(struct tty_struct * tty, struct file * filp) if (port->port.count) { gs_dprintk(GS_DEBUG_CLOSE, "gs_close port %p: count: %d\n", port, port->port.count); - spin_unlock_irqrestore(&port->driver_lock, flags); + spin_unlock_irqrestore(&port->port.lock, flags); func_exit (); return; } @@ -559,8 +564,10 @@ void gs_close(struct tty_struct * tty, struct file * filp) * line status register. */ + spin_lock_irqsave(&port->driver_lock, flags); port->rd->disable_rx_interrupts (port); spin_unlock_irqrestore(&port->driver_lock, flags); + spin_unlock_irqrestore(&port->port.lock, flags); /* close has no way of returning "EINTR", so discard return value */ if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE) @@ -573,20 +580,25 @@ void gs_close(struct tty_struct * tty, struct file * filp) tty_ldisc_flush(tty); tty->closing = 0; + spin_lock_irqsave(&port->driver_lock, flags); port->event = 0; port->rd->close (port); port->rd->shutdown_port (port); + spin_unlock_irqrestore(&port->driver_lock, flags); + + spin_lock_irqsave(&port->port.lock, flags); port->port.tty = NULL; if (port->port.blocked_open) { if (port->close_delay) { - spin_unlock_irqrestore(&port->driver_lock, flags); + spin_unlock_irqrestore(&port->port.lock, flags); msleep_interruptible(jiffies_to_msecs(port->close_delay)); - spin_lock_irqsave(&port->driver_lock, flags); + spin_lock_irqsave(&port->port.lock, flags); } wake_up_interruptible(&port->port.open_wait); } port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING | ASYNC_INITIALIZED); + spin_unlock_irqrestore(&port->port.lock, flags); wake_up_interruptible(&port->port.close_wait); func_exit (); diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 0587b66d6fc7..5a8a4c28c867 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -529,7 +529,7 @@ static void hvc_set_winsz(struct work_struct *work) tty = tty_kref_get(hp->tty); spin_unlock_irqrestore(&hp->lock, hvc_flags); - tty_do_resize(tty, tty, &ws); + tty_do_resize(tty, &ws); tty_kref_put(tty); } diff --git a/drivers/char/hvsi.c b/drivers/char/hvsi.c index af055287271a..406f8742a260 100644 --- a/drivers/char/hvsi.c +++ b/drivers/char/hvsi.c @@ -997,14 +997,14 @@ out: static int hvsi_write_room(struct tty_struct *tty) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; return N_OUTBUF - hp->n_outbuf; } static int hvsi_chars_in_buffer(struct tty_struct *tty) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; return hp->n_outbuf; } @@ -1070,7 +1070,7 @@ out: */ static void hvsi_throttle(struct tty_struct *tty) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; pr_debug("%s\n", __func__); @@ -1079,7 +1079,7 @@ static void hvsi_throttle(struct tty_struct *tty) static void hvsi_unthrottle(struct tty_struct *tty) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; unsigned long flags; int shouldflip = 0; @@ -1100,7 +1100,7 @@ static void hvsi_unthrottle(struct tty_struct *tty) static int hvsi_tiocmget(struct tty_struct *tty, struct file *file) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; hvsi_get_mctrl(hp); return hp->mctrl; @@ -1109,7 +1109,7 @@ static int hvsi_tiocmget(struct tty_struct *tty, struct file *file) static int hvsi_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct hvsi_struct *hp = (struct hvsi_struct *)tty->driver_data; + struct hvsi_struct *hp = tty->driver_data; unsigned long flags; uint16_t new_mctrl; diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index b60d425ce8d1..fc8cf7ac7f2b 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -485,7 +485,21 @@ static struct dmi_system_id __initdata i8k_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MP061"), }, }, - { } + { + .ident = "Dell Precision", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision"), + }, + }, + { + .ident = "Dell Vostro", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro"), + }, + }, + { } }; /* diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index 04e4549299ba..24aa6e88e223 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -328,11 +328,13 @@ static inline void drop_rts(struct isi_port *port) } /* card->lock MUST NOT be held */ -static inline void raise_dtr_rts(struct isi_port *port) + +static void isicom_raise_dtr_rts(struct tty_port *port) { - struct isi_board *card = port->card; + struct isi_port *ip = container_of(port, struct isi_port, port); + struct isi_board *card = ip->card; unsigned long base = card->base; - u16 channel = port->channel; + u16 channel = ip->channel; if (!lock_card(card)) return; @@ -340,7 +342,7 @@ static inline void raise_dtr_rts(struct isi_port *port) outw(0x8000 | (channel << card->shift_count) | 0x02, base); outw(0x0f04, base); InterruptTheCard(base); - port->status |= (ISI_DTR | ISI_RTS); + ip->status |= (ISI_DTR | ISI_RTS); unlock_card(card); } @@ -830,80 +832,10 @@ static int isicom_setup_port(struct tty_struct *tty) return 0; } -static int block_til_ready(struct tty_struct *tty, struct file *filp, - struct isi_port *port) +static int isicom_carrier_raised(struct tty_port *port) { - struct isi_board *card = port->card; - int do_clocal = 0, retval; - unsigned long flags; - DECLARE_WAITQUEUE(wait, current); - - /* block if port is in the process of being closed */ - - if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) { - pr_dbg("block_til_ready: close in progress.\n"); - interruptible_sleep_on(&port->port.close_wait); - if (port->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - else - return -ERESTARTSYS; - } - - /* if non-blocking mode is set ... */ - - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { - pr_dbg("block_til_ready: non-block mode.\n"); - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; - } - - if (C_CLOCAL(tty)) - do_clocal = 1; - - /* block waiting for DCD to be asserted, and while - callout dev is busy */ - retval = 0; - add_wait_queue(&port->port.open_wait, &wait); - - spin_lock_irqsave(&card->card_lock, flags); - if (!tty_hung_up_p(filp)) - port->port.count--; - port->port.blocked_open++; - spin_unlock_irqrestore(&card->card_lock, flags); - - while (1) { - raise_dtr_rts(port); - - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) { - if (port->port.flags & ASYNC_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - break; - } - if (!(port->port.flags & ASYNC_CLOSING) && - (do_clocal || (port->status & ISI_DCD))) { - break; - } - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - schedule(); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&port->port.open_wait, &wait); - spin_lock_irqsave(&card->card_lock, flags); - if (!tty_hung_up_p(filp)) - port->port.count++; - port->port.blocked_open--; - spin_unlock_irqrestore(&card->card_lock, flags); - if (retval) - return retval; - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; + struct isi_port *ip = container_of(port, struct isi_port, port); + return (ip->status & ISI_DCD)?1 : 0; } static int isicom_open(struct tty_struct *tty, struct file *filp) @@ -932,12 +864,13 @@ static int isicom_open(struct tty_struct *tty, struct file *filp) isicom_setup_board(card); + /* FIXME: locking on port.count etc */ port->port.count++; tty->driver_data = port; tty_port_tty_set(&port->port, tty); error = isicom_setup_port(tty); if (error == 0) - error = block_til_ready(tty, filp, port); + error = tty_port_block_til_ready(&port->port, tty, filp); return error; } @@ -1012,76 +945,30 @@ static void isicom_flush_buffer(struct tty_struct *tty) static void isicom_close(struct tty_struct *tty, struct file *filp) { - struct isi_port *port = tty->driver_data; + struct isi_port *ip = tty->driver_data; + struct tty_port *port = &ip->port; struct isi_board *card; unsigned long flags; - if (!port) - return; - card = port->card; - if (isicom_paranoia_check(port, tty->name, "isicom_close")) - return; - - pr_dbg("Close start!!!.\n"); - - spin_lock_irqsave(&card->card_lock, flags); - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&card->card_lock, flags); - return; - } - - if (tty->count == 1 && port->port.count != 1) { - printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port " - "count tty->count = 1 port count = %d.\n", - card->base, port->port.count); - port->port.count = 1; - } - if (--port->port.count < 0) { - printk(KERN_WARNING "ISICOM:(0x%lx) isicom_close: bad port " - "count for channel%d = %d", card->base, port->channel, - port->port.count); - port->port.count = 0; - } + BUG_ON(!ip); - if (port->port.count) { - spin_unlock_irqrestore(&card->card_lock, flags); + card = ip->card; + if (isicom_paranoia_check(ip, tty->name, "isicom_close")) return; - } - port->port.flags |= ASYNC_CLOSING; - tty->closing = 1; - spin_unlock_irqrestore(&card->card_lock, flags); - if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, port->port.closing_wait); /* indicate to the card that no more data can be received on this port */ spin_lock_irqsave(&card->card_lock, flags); - if (port->port.flags & ASYNC_INITIALIZED) { - card->port_status &= ~(1 << port->channel); + if (port->flags & ASYNC_INITIALIZED) { + card->port_status &= ~(1 << ip->channel); outw(card->port_status, card->base + 0x02); } - isicom_shutdown_port(port); + isicom_shutdown_port(ip); spin_unlock_irqrestore(&card->card_lock, flags); isicom_flush_buffer(tty); - tty_ldisc_flush(tty); - - spin_lock_irqsave(&card->card_lock, flags); - tty->closing = 0; - - if (port->port.blocked_open) { - spin_unlock_irqrestore(&card->card_lock, flags); - if (port->port.close_delay) { - pr_dbg("scheduling until time out.\n"); - msleep_interruptible( - jiffies_to_msecs(port->port.close_delay)); - } - spin_lock_irqsave(&card->card_lock, flags); - wake_up_interruptible(&port->port.open_wait); - } - port->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING); - wake_up_interruptible(&port->port.close_wait); - spin_unlock_irqrestore(&card->card_lock, flags); + + tty_port_close_end(port, tty); } /* write et all */ @@ -1420,10 +1307,7 @@ static void isicom_hangup(struct tty_struct *tty) isicom_shutdown_port(port); spin_unlock_irqrestore(&port->card->card_lock, flags); - port->port.count = 0; - port->port.flags &= ~ASYNC_NORMAL_ACTIVE; - tty_port_tty_set(&port->port, NULL); - wake_up_interruptible(&port->port.open_wait); + tty_port_hangup(&port->port); } @@ -1452,6 +1336,11 @@ static const struct tty_operations isicom_ops = { .break_ctl = isicom_send_break, }; +static const struct tty_port_operations isicom_port_ops = { + .carrier_raised = isicom_carrier_raised, + .raise_dtr_rts = isicom_raise_dtr_rts, +}; + static int __devinit reset_card(struct pci_dev *pdev, const unsigned int card, unsigned int *signature) { @@ -1794,6 +1683,7 @@ static int __init isicom_init(void) spin_lock_init(&isi_card[idx].card_lock); for (channel = 0; channel < 16; channel++, port++) { tty_port_init(&port->port); + port->port.ops = &isicom_port_ops; port->magic = ISICOM_MAGIC; port->card = &isi_card[idx]; port->channel = channel; diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index 4b10770fa937..5c3dc6b8411c 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -151,7 +151,7 @@ static char *stli_drvversion = "5.6.0"; static char *stli_serialname = "ttyE"; static struct tty_driver *stli_serial; - +static const struct tty_port_operations stli_port_ops; #define STLI_TXBUFSIZE 4096 @@ -626,8 +626,6 @@ static int stli_hostcmd(struct stlibrd *brdp, struct stliport *portp); static int stli_initopen(struct tty_struct *tty, struct stlibrd *brdp, struct stliport *portp); static int stli_rawopen(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait); static int stli_rawclose(struct stlibrd *brdp, struct stliport *portp, unsigned long arg, int wait); -static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp, - struct stliport *portp, struct file *filp); static int stli_setport(struct tty_struct *tty); static int stli_cmdwait(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback); static void stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigned long cmd, void *arg, int size, int copyback); @@ -769,7 +767,7 @@ static int stli_parsebrd(struct stlconf *confp, char **argp) break; } if (i == ARRAY_SIZE(stli_brdstr)) { - printk("STALLION: unknown board name, %s?\n", argp[0]); + printk(KERN_WARNING "istallion: unknown board name, %s?\n", argp[0]); return 0; } @@ -787,6 +785,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp) { struct stlibrd *brdp; struct stliport *portp; + struct tty_port *port; unsigned int minordev, brdnr, portnr; int rc; @@ -808,30 +807,19 @@ static int stli_open(struct tty_struct *tty, struct file *filp) return -ENODEV; if (portp->devnr < 1) return -ENODEV; - - -/* - * Check if this port is in the middle of closing. If so then wait - * until it is closed then return error status based on flag settings. - * The sleep here does not need interrupt protection since the wakeup - * for it is done with the same context. - */ - if (portp->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&portp->port.close_wait); - if (portp->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - return -ERESTARTSYS; - } + port = &portp->port; /* * On the first open of the device setup the port hardware, and * initialize the per port data structure. Since initializing the port * requires several commands to the board we will need to wait for any * other open that is already initializing the port. + * + * Review - locking */ - tty_port_tty_set(&portp->port, tty); + tty_port_tty_set(port, tty); tty->driver_data = portp; - portp->port.count++; + port->count++; wait_event_interruptible(portp->raw_wait, !test_bit(ST_INITIALIZING, &portp->state)); @@ -841,7 +829,8 @@ static int stli_open(struct tty_struct *tty, struct file *filp) if ((portp->port.flags & ASYNC_INITIALIZED) == 0) { set_bit(ST_INITIALIZING, &portp->state); if ((rc = stli_initopen(tty, brdp, portp)) >= 0) { - portp->port.flags |= ASYNC_INITIALIZED; + /* Locking */ + port->flags |= ASYNC_INITIALIZED; clear_bit(TTY_IO_ERROR, &tty->flags); } clear_bit(ST_INITIALIZING, &portp->state); @@ -849,31 +838,7 @@ static int stli_open(struct tty_struct *tty, struct file *filp) if (rc < 0) return rc; } - -/* - * Check if this port is in the middle of closing. If so then wait - * until it is closed then return error status, based on flag settings. - * The sleep here does not need interrupt protection since the wakeup - * for it is done with the same context. - */ - if (portp->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&portp->port.close_wait); - if (portp->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - return -ERESTARTSYS; - } - -/* - * Based on type of open being done check if it can overlap with any - * previous opens still in effect. If we are a normal serial device - * then also we might have to wait for carrier. - */ - if (!(filp->f_flags & O_NONBLOCK)) { - if ((rc = stli_waitcarrier(tty, brdp, portp, filp)) != 0) - return rc; - } - portp->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; + return tty_port_block_til_ready(&portp->port, tty, filp); } /*****************************************************************************/ @@ -882,25 +847,16 @@ static void stli_close(struct tty_struct *tty, struct file *filp) { struct stlibrd *brdp; struct stliport *portp; + struct tty_port *port; unsigned long flags; portp = tty->driver_data; if (portp == NULL) return; + port = &portp->port; - spin_lock_irqsave(&stli_lock, flags); - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&stli_lock, flags); - return; - } - if ((tty->count == 1) && (portp->port.count != 1)) - portp->port.count = 1; - if (portp->port.count-- > 1) { - spin_unlock_irqrestore(&stli_lock, flags); + if (tty_port_close_start(port, tty, filp) == 0) return; - } - - portp->port.flags |= ASYNC_CLOSING; /* * May want to wait for data to drain before closing. The BUSY flag @@ -908,15 +864,19 @@ static void stli_close(struct tty_struct *tty, struct file *filp) * updated by messages from the slave - indicating when all chars * really have drained. */ + spin_lock_irqsave(&stli_lock, flags); if (tty == stli_txcooktty) stli_flushchars(tty); - tty->closing = 1; spin_unlock_irqrestore(&stli_lock, flags); + /* We end up doing this twice for the moment. This needs looking at + eventually. Note we still use portp->closing_wait as a result */ if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, portp->closing_wait); - portp->port.flags &= ~ASYNC_INITIALIZED; + /* FIXME: port locking here needs attending to */ + port->flags &= ~ASYNC_INITIALIZED; + brdp = stli_brds[portp->brdnr]; stli_rawclose(brdp, portp, 0, 0); if (tty->termios->c_cflag & HUPCL) { @@ -934,17 +894,8 @@ static void stli_close(struct tty_struct *tty, struct file *filp) set_bit(ST_DOFLUSHRX, &portp->state); stli_flushbuffer(tty); - tty->closing = 0; - tty_port_tty_set(&portp->port, NULL); - - if (portp->openwaitcnt) { - if (portp->close_delay) - msleep_interruptible(jiffies_to_msecs(portp->close_delay)); - wake_up_interruptible(&portp->port.open_wait); - } - - portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&portp->port.close_wait); + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); } /*****************************************************************************/ @@ -1183,62 +1134,23 @@ static int stli_setport(struct tty_struct *tty) /*****************************************************************************/ -/* - * Possibly need to wait for carrier (DCD signal) to come high. Say - * maybe because if we are clocal then we don't need to wait... - */ - -static int stli_waitcarrier(struct tty_struct *tty, struct stlibrd *brdp, - struct stliport *portp, struct file *filp) +static int stli_carrier_raised(struct tty_port *port) { - unsigned long flags; - int rc, doclocal; - - rc = 0; - doclocal = 0; - - if (tty->termios->c_cflag & CLOCAL) - doclocal++; - - spin_lock_irqsave(&stli_lock, flags); - portp->openwaitcnt++; - if (! tty_hung_up_p(filp)) - portp->port.count--; - spin_unlock_irqrestore(&stli_lock, flags); - - for (;;) { - stli_mkasysigs(&portp->asig, 1, 1); - if ((rc = stli_cmdwait(brdp, portp, A_SETSIGNALS, - &portp->asig, sizeof(asysigs_t), 0)) < 0) - break; - if (tty_hung_up_p(filp) || - ((portp->port.flags & ASYNC_INITIALIZED) == 0)) { - if (portp->port.flags & ASYNC_HUP_NOTIFY) - rc = -EBUSY; - else - rc = -ERESTARTSYS; - break; - } - if (((portp->port.flags & ASYNC_CLOSING) == 0) && - (doclocal || (portp->sigs & TIOCM_CD))) { - break; - } - if (signal_pending(current)) { - rc = -ERESTARTSYS; - break; - } - interruptible_sleep_on(&portp->port.open_wait); - } - - spin_lock_irqsave(&stli_lock, flags); - if (! tty_hung_up_p(filp)) - portp->port.count++; - portp->openwaitcnt--; - spin_unlock_irqrestore(&stli_lock, flags); + struct stliport *portp = container_of(port, struct stliport, port); + return (portp->sigs & TIOCM_CD) ? 1 : 0; +} - return rc; +static void stli_raise_dtr_rts(struct tty_port *port) +{ + struct stliport *portp = container_of(port, struct stliport, port); + struct stlibrd *brdp = stli_brds[portp->brdnr]; + stli_mkasysigs(&portp->asig, 1, 1); + if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig, + sizeof(asysigs_t), 0) < 0) + printk(KERN_WARNING "istallion: dtr raise failed.\n"); } + /*****************************************************************************/ /* @@ -1550,7 +1462,7 @@ static int stli_getserial(struct stliport *portp, struct serial_struct __user *s sio.irq = 0; sio.flags = portp->port.flags; sio.baud_base = portp->baud_base; - sio.close_delay = portp->close_delay; + sio.close_delay = portp->port.close_delay; sio.closing_wait = portp->closing_wait; sio.custom_divisor = portp->custom_divisor; sio.xmit_fifo_size = 0; @@ -1582,7 +1494,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s return -EFAULT; if (!capable(CAP_SYS_ADMIN)) { if ((sio.baud_base != portp->baud_base) || - (sio.close_delay != portp->close_delay) || + (sio.close_delay != portp->port.close_delay) || ((sio.flags & ~ASYNC_USR_MASK) != (portp->port.flags & ~ASYNC_USR_MASK))) return -EPERM; @@ -1591,7 +1503,7 @@ static int stli_setserial(struct tty_struct *tty, struct serial_struct __user *s portp->port.flags = (portp->port.flags & ~ASYNC_USR_MASK) | (sio.flags & ASYNC_USR_MASK); portp->baud_base = sio.baud_base; - portp->close_delay = sio.close_delay; + portp->port.close_delay = sio.close_delay; portp->closing_wait = sio.closing_wait; portp->custom_divisor = sio.custom_divisor; @@ -1821,6 +1733,7 @@ static void stli_hangup(struct tty_struct *tty) { struct stliport *portp; struct stlibrd *brdp; + struct tty_port *port; unsigned long flags; portp = tty->driver_data; @@ -1831,8 +1744,11 @@ static void stli_hangup(struct tty_struct *tty) brdp = stli_brds[portp->brdnr]; if (brdp == NULL) return; + port = &portp->port; - portp->port.flags &= ~ASYNC_INITIALIZED; + spin_lock_irqsave(&port->lock, flags); + port->flags &= ~ASYNC_INITIALIZED; + spin_unlock_irqrestore(&port->lock, flags); if (!test_bit(ST_CLOSING, &portp->state)) stli_rawclose(brdp, portp, 0, 0); @@ -1853,12 +1769,9 @@ static void stli_hangup(struct tty_struct *tty) clear_bit(ST_TXBUSY, &portp->state); clear_bit(ST_RXSTOP, &portp->state); set_bit(TTY_IO_ERROR, &tty->flags); - tty_port_tty_set(&portp->port, NULL); - portp->port.flags &= ~ASYNC_NORMAL_ACTIVE; - portp->port.count = 0; spin_unlock_irqrestore(&stli_lock, flags); - wake_up_interruptible(&portp->port.open_wait); + tty_port_hangup(port); } /*****************************************************************************/ @@ -2132,7 +2045,7 @@ static void __stli_sendcmd(struct stlibrd *brdp, struct stliport *portp, unsigne unsigned char __iomem *bits; if (test_bit(ST_CMDING, &portp->state)) { - printk(KERN_ERR "STALLION: command already busy, cmd=%x!\n", + printk(KERN_ERR "istallion: command already busy, cmd=%x!\n", (int) cmd); return; } @@ -2692,16 +2605,17 @@ static int stli_initports(struct stlibrd *brdp) for (i = 0, panelnr = 0, panelport = 0; (i < brdp->nrports); i++) { portp = kzalloc(sizeof(struct stliport), GFP_KERNEL); if (!portp) { - printk("STALLION: failed to allocate port structure\n"); + printk(KERN_WARNING "istallion: failed to allocate port structure\n"); continue; } tty_port_init(&portp->port); + portp->port.ops = &stli_port_ops; portp->magic = STLI_PORTMAGIC; portp->portnr = i; portp->brdnr = brdp->brdnr; portp->panelnr = panelnr; portp->baud_base = STL_BAUDBASE; - portp->close_delay = STL_CLOSEDELAY; + portp->port.close_delay = STL_CLOSEDELAY; portp->closing_wait = 30 * HZ; init_waitqueue_head(&portp->port.open_wait); init_waitqueue_head(&portp->port.close_wait); @@ -2758,7 +2672,7 @@ static void __iomem *stli_ecpgetmemptr(struct stlibrd *brdp, unsigned long offse unsigned char val; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), brd=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -2832,7 +2746,7 @@ static void __iomem *stli_ecpeigetmemptr(struct stlibrd *brdp, unsigned long off unsigned char val; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), brd=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -2884,7 +2798,7 @@ static void __iomem *stli_ecpmcgetmemptr(struct stlibrd *brdp, unsigned long off unsigned char val; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), brd=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -2929,7 +2843,7 @@ static void __iomem *stli_ecppcigetmemptr(struct stlibrd *brdp, unsigned long of unsigned char val; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), board=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -2994,7 +2908,7 @@ static void __iomem *stli_onbgetmemptr(struct stlibrd *brdp, unsigned long offse void __iomem *ptr; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), brd=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -3060,7 +2974,7 @@ static void __iomem *stli_onbegetmemptr(struct stlibrd *brdp, unsigned long offs unsigned char val; if (offset > brdp->memsize) { - printk(KERN_ERR "STALLION: shared memory pointer=%x out of " + printk(KERN_ERR "istallion: shared memory pointer=%x out of " "range at line=%d(%d), brd=%d\n", (int) offset, line, __LINE__, brdp->brdnr); ptr = NULL; @@ -3499,7 +3413,7 @@ static int stli_startbrd(struct stlibrd *brdp) #endif if (nrdevs < (brdp->nrports + 1)) { - printk(KERN_ERR "STALLION: slave failed to allocate memory for " + printk(KERN_ERR "istallion: slave failed to allocate memory for " "all devices, devices=%d\n", nrdevs); brdp->nrports = nrdevs - 1; } @@ -3509,13 +3423,13 @@ static int stli_startbrd(struct stlibrd *brdp) brdp->bitsize = (nrdevs + 7) / 8; memoff = readl(&hdrp->memp); if (memoff > brdp->memsize) { - printk(KERN_ERR "STALLION: corrupted shared memory region?\n"); + printk(KERN_ERR "istallion: corrupted shared memory region?\n"); rc = -EIO; goto stli_donestartup; } memp = (cdkmem_t __iomem *) EBRDGETMEMPTR(brdp, memoff); if (readw(&memp->dtype) != TYP_ASYNCTRL) { - printk(KERN_ERR "STALLION: no slave control device found\n"); + printk(KERN_ERR "istallion: no slave control device found\n"); goto stli_donestartup; } memp++; @@ -3600,7 +3514,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp) retval = stli_initonb(brdp); break; default: - printk(KERN_ERR "STALLION: board=%d is unknown board " + printk(KERN_ERR "istallion: board=%d is unknown board " "type=%d\n", brdp->brdnr, brdp->brdtype); retval = -ENODEV; } @@ -3609,7 +3523,7 @@ static int __devinit stli_brdinit(struct stlibrd *brdp) return retval; stli_initports(brdp); - printk(KERN_INFO "STALLION: %s found, board=%d io=%x mem=%x " + printk(KERN_INFO "istallion: %s found, board=%d io=%x mem=%x " "nrpanels=%d nrports=%d\n", stli_brdnames[brdp->brdtype], brdp->brdnr, brdp->iobase, (int) brdp->memaddr, brdp->nrpanels, brdp->nrports); @@ -3703,7 +3617,7 @@ static int stli_eisamemprobe(struct stlibrd *brdp) if (! foundit) { brdp->memaddr = 0; brdp->membase = NULL; - printk(KERN_ERR "STALLION: failed to probe shared memory " + printk(KERN_ERR "istallion: failed to probe shared memory " "region for %s in EISA slot=%d\n", stli_brdnames[brdp->brdtype], (brdp->iobase >> 12)); return -ENODEV; @@ -3848,7 +3762,7 @@ static int __devinit stli_pciprobe(struct pci_dev *pdev, mutex_lock(&stli_brdslock); brdnr = stli_getbrdnr(); if (brdnr < 0) { - printk(KERN_INFO "STALLION: too many boards found, " + printk(KERN_INFO "istallion: too many boards found, " "maximum supported %d\n", STL_MAXBRDS); mutex_unlock(&stli_brdslock); retval = -EIO; @@ -3920,7 +3834,7 @@ static struct stlibrd *stli_allocbrd(void) brdp = kzalloc(sizeof(struct stlibrd), GFP_KERNEL); if (!brdp) { - printk(KERN_ERR "STALLION: failed to allocate memory " + printk(KERN_ERR "istallion: failed to allocate memory " "(size=%Zd)\n", sizeof(struct stlibrd)); return NULL; } @@ -4518,6 +4432,11 @@ static const struct tty_operations stli_ops = { .tiocmset = stli_tiocmset, }; +static const struct tty_port_operations stli_port_ops = { + .carrier_raised = stli_carrier_raised, + .raise_dtr_rts = stli_raise_dtr_rts, +}; + /*****************************************************************************/ /* * Loadable module initialization stuff. @@ -4554,7 +4473,7 @@ static int __init istallion_module_init(void) stli_txcookbuf = kmalloc(STLI_TXBUFSIZE, GFP_KERNEL); if (!stli_txcookbuf) { - printk(KERN_ERR "STALLION: failed to allocate memory " + printk(KERN_ERR "istallion: failed to allocate memory " "(size=%d)\n", STLI_TXBUFSIZE); retval = -ENOMEM; goto err; @@ -4579,7 +4498,7 @@ static int __init istallion_module_init(void) retval = tty_register_driver(stli_serial); if (retval) { - printk(KERN_ERR "STALLION: failed to register serial driver\n"); + printk(KERN_ERR "istallion: failed to register serial driver\n"); goto err_ttyput; } @@ -4593,7 +4512,7 @@ static int __init istallion_module_init(void) */ retval = register_chrdev(STL_SIOMEMMAJOR, "staliomem", &stli_fsiomem); if (retval) { - printk(KERN_ERR "STALLION: failed to register serial memory " + printk(KERN_ERR "istallion: failed to register serial memory " "device\n"); goto err_deinit; } diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 12d327a2c9ba..8b0da97d5293 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -206,6 +206,7 @@ static void moxa_poll(unsigned long); static void moxa_set_tty_param(struct tty_struct *, struct ktermios *); static void moxa_setup_empty_event(struct tty_struct *); static void moxa_shut_down(struct tty_struct *); +static int moxa_carrier_raised(struct tty_port *); /* * moxa board interface functions: */ @@ -405,6 +406,10 @@ static const struct tty_operations moxa_ops = { .tiocmset = moxa_tiocmset, }; +static const struct tty_port_operations moxa_port_ops = { + .carrier_raised = moxa_carrier_raised, +}; + static struct tty_driver *moxaDriver; static DEFINE_TIMER(moxaTimer, moxa_poll, 0, 0); static DEFINE_SPINLOCK(moxa_lock); @@ -826,6 +831,7 @@ static int moxa_init_board(struct moxa_board_conf *brd, struct device *dev) for (i = 0, p = brd->ports; i < MAX_PORTS_PER_BOARD; i++, p++) { tty_port_init(&p->port); + p->port.ops = &moxa_port_ops; p->type = PORT_16550A; p->cflag = B9600 | CS8 | CREAD | CLOCAL | HUPCL; } @@ -1115,15 +1121,27 @@ static void moxa_close_port(struct tty_struct *tty) tty_port_tty_set(&ch->port, NULL); } +static int moxa_carrier_raised(struct tty_port *port) +{ + struct moxa_port *ch = container_of(port, struct moxa_port, port); + int dcd; + + spin_lock_bh(&moxa_lock); + dcd = ch->DCDState; + spin_unlock_bh(&moxa_lock); + return dcd; +} + static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp, struct moxa_port *ch) { + struct tty_port *port = &ch->port; DEFINE_WAIT(wait); int retval = 0; u8 dcd; while (1) { - prepare_to_wait(&ch->port.open_wait, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE); if (tty_hung_up_p(filp)) { #ifdef SERIAL_DO_RESTART retval = -ERESTARTSYS; @@ -1132,9 +1150,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp, #endif break; } - spin_lock_bh(&moxa_lock); - dcd = ch->DCDState; - spin_unlock_bh(&moxa_lock); + dcd = tty_port_carrier_raised(port); if (dcd) break; @@ -1144,7 +1160,7 @@ static int moxa_block_till_ready(struct tty_struct *tty, struct file *filp, } schedule(); } - finish_wait(&ch->port.open_wait, &wait); + finish_wait(&port->open_wait, &wait); return retval; } diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 047766915411..402c9f217f83 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -541,74 +541,21 @@ static unsigned char mxser_get_msr(int baseaddr, int mode, int port) return status; } -static int mxser_block_til_ready(struct tty_struct *tty, struct file *filp, - struct mxser_port *port) +static int mxser_carrier_raised(struct tty_port *port) { - DECLARE_WAITQUEUE(wait, current); - int retval; - int do_clocal = 0; - unsigned long flags; - - /* - * If non-blocking mode is set, or the port is not enabled, - * then make the check up front and then exit. - */ - if ((filp->f_flags & O_NONBLOCK) || - test_bit(TTY_IO_ERROR, &tty->flags)) { - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; - } + struct mxser_port *mp = container_of(port, struct mxser_port, port); + return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0; +} - if (tty->termios->c_cflag & CLOCAL) - do_clocal = 1; +static void mxser_raise_dtr_rts(struct tty_port *port) +{ + struct mxser_port *mp = container_of(port, struct mxser_port, port); + unsigned long flags; - /* - * Block waiting for the carrier detect and the line to become - * free (i.e., not in use by the callout). While we are in - * this loop, port->port.count is dropped by one, so that - * mxser_close() knows when to free things. We restore it upon - * exit, either normal or abnormal. - */ - retval = 0; - add_wait_queue(&port->port.open_wait, &wait); - - spin_lock_irqsave(&port->slock, flags); - if (!tty_hung_up_p(filp)) - port->port.count--; - spin_unlock_irqrestore(&port->slock, flags); - port->port.blocked_open++; - while (1) { - spin_lock_irqsave(&port->slock, flags); - outb(inb(port->ioaddr + UART_MCR) | - UART_MCR_DTR | UART_MCR_RTS, port->ioaddr + UART_MCR); - spin_unlock_irqrestore(&port->slock, flags); - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(port->port.flags & ASYNC_INITIALIZED)) { - if (port->port.flags & ASYNC_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - break; - } - if (!(port->port.flags & ASYNC_CLOSING) && - (do_clocal || - (inb(port->ioaddr + UART_MSR) & UART_MSR_DCD))) - break; - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - schedule(); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&port->port.open_wait, &wait); - if (!tty_hung_up_p(filp)) - port->port.count++; - port->port.blocked_open--; - if (retval) - return retval; - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; + spin_lock_irqsave(&mp->slock, flags); + outb(inb(mp->ioaddr + UART_MCR) | + UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + spin_unlock_irqrestore(&mp->slock, flags); } static int mxser_set_baud(struct tty_struct *tty, long newspd) @@ -1087,14 +1034,14 @@ static int mxser_open(struct tty_struct *tty, struct file *filp) /* * Start up serial port */ - spin_lock_irqsave(&info->slock, flags); + spin_lock_irqsave(&info->port.lock, flags); info->port.count++; - spin_unlock_irqrestore(&info->slock, flags); + spin_unlock_irqrestore(&info->port.lock, flags); retval = mxser_startup(tty); if (retval) return retval; - retval = mxser_block_til_ready(tty, filp, info); + retval = tty_port_block_til_ready(&info->port, tty, filp); if (retval) return retval; @@ -1133,58 +1080,27 @@ static void mxser_flush_buffer(struct tty_struct *tty) static void mxser_close(struct tty_struct *tty, struct file *filp) { struct mxser_port *info = tty->driver_data; + struct tty_port *port = &info->port; unsigned long timeout; - unsigned long flags; if (tty->index == MXSER_PORTS) return; if (!info) return; - spin_lock_irqsave(&info->slock, flags); - - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&info->slock, flags); - return; - } - if ((tty->count == 1) && (info->port.count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. Info->port.count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - printk(KERN_ERR "mxser_close: bad serial port count; " - "tty->count is 1, info->port.count is %d\n", info->port.count); - info->port.count = 1; - } - if (--info->port.count < 0) { - printk(KERN_ERR "mxser_close: bad serial port count for " - "ttys%d: %d\n", tty->index, info->port.count); - info->port.count = 0; - } - if (info->port.count) { - spin_unlock_irqrestore(&info->slock, flags); + if (tty_port_close_start(port, tty, filp) == 0) return; - } - info->port.flags |= ASYNC_CLOSING; - spin_unlock_irqrestore(&info->slock, flags); + /* * Save the termios structure, since this port may have * separate termios for callout and dialin. + * + * FIXME: Can this go ? */ if (info->port.flags & ASYNC_NORMAL_ACTIVE) info->normal_termios = *tty->termios; /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - tty->closing = 1; - if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, info->port.closing_wait); - /* * At this point we stop accepting input. To do this, we * disable the receive line status interrupts, and tell the * interrupt driver to stop checking the data ready bit in the @@ -1209,19 +1125,12 @@ static void mxser_close(struct tty_struct *tty, struct file *filp) } } mxser_shutdown(tty); - mxser_flush_buffer(tty); - tty_ldisc_flush(tty); - - tty->closing = 0; - tty_port_tty_set(&info->port, NULL); - if (info->port.blocked_open) { - if (info->port.close_delay) - schedule_timeout_interruptible(info->port.close_delay); - wake_up_interruptible(&info->port.open_wait); - } - info->port.flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING); + /* Right now the tty_port set is done outside of the close_end helper + as we don't yet have everyone using refcounts */ + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); } static int mxser_write(struct tty_struct *tty, const unsigned char *buf, int count) @@ -2146,10 +2055,7 @@ static void mxser_hangup(struct tty_struct *tty) mxser_flush_buffer(tty); mxser_shutdown(tty); - info->port.count = 0; - info->port.flags &= ~ASYNC_NORMAL_ACTIVE; - tty_port_tty_set(&info->port, NULL); - wake_up_interruptible(&info->port.open_wait); + tty_port_hangup(&info->port); } /* @@ -2449,6 +2355,11 @@ static const struct tty_operations mxser_ops = { .tiocmset = mxser_tiocmset, }; +struct tty_port_operations mxser_port_ops = { + .carrier_raised = mxser_carrier_raised, + .raise_dtr_rts = mxser_raise_dtr_rts, +}; + /* * The MOXA Smartio/Industio serial driver boot-time initialization code! */ @@ -2482,6 +2393,7 @@ static int __devinit mxser_initbrd(struct mxser_board *brd, for (i = 0; i < brd->info->nports; i++) { info = &brd->ports[i]; tty_port_init(&info->port); + info->port.ops = &mxser_port_ops; info->board = brd; info->stop_rx = 0; info->ldisc_stop_rx = 0; diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 4a8215a89ad3..d2e93e343226 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -1003,7 +1003,7 @@ static int r3964_open(struct tty_struct *tty) static void r3964_close(struct tty_struct *tty) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; struct r3964_client_info *pClient, *pNext; struct r3964_message *pMsg; struct r3964_block_header *pHeader, *pNextHeader; @@ -1058,7 +1058,7 @@ static void r3964_close(struct tty_struct *tty) static ssize_t r3964_read(struct tty_struct *tty, struct file *file, unsigned char __user * buf, size_t nr) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; struct r3964_client_info *pClient; struct r3964_message *pMsg; struct r3964_client_message theMsg; @@ -1113,7 +1113,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, static ssize_t r3964_write(struct tty_struct *tty, struct file *file, const unsigned char *data, size_t count) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; struct r3964_block_header *pHeader; struct r3964_client_info *pClient; unsigned char *new_data; @@ -1182,7 +1182,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file, static int r3964_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; if (pInfo == NULL) return -EINVAL; switch (cmd) { @@ -1216,7 +1216,7 @@ static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old) static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; struct r3964_client_info *pClient; struct r3964_message *pMsg = NULL; unsigned long flags; @@ -1241,7 +1241,7 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { - struct r3964_info *pInfo = (struct r3964_info *)tty->disc_data; + struct r3964_info *pInfo = tty->disc_data; const unsigned char *p; char *f, flags = 0; int i; diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index efbfe9612658..f6f0e4ec2b51 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -47,8 +47,8 @@ #include <linux/bitops.h> #include <linux/audit.h> #include <linux/file.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/system.h> /* number of characters left in xmit buffer before select has we have room */ @@ -62,6 +62,17 @@ #define TTY_THRESHOLD_THROTTLE 128 /* now based on remaining room */ #define TTY_THRESHOLD_UNTHROTTLE 128 +/* + * Special byte codes used in the echo buffer to represent operations + * or special handling of characters. Bytes in the echo buffer that + * are not part of such special blocks are treated as normal character + * codes. + */ +#define ECHO_OP_START 0xff +#define ECHO_OP_MOVE_BACK_COL 0x80 +#define ECHO_OP_SET_CANON_COL 0x81 +#define ECHO_OP_ERASE_TAB 0x82 + static inline unsigned char *alloc_buf(void) { gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; @@ -169,6 +180,7 @@ static void check_unthrottle(struct tty_struct *tty) * * Locking: tty_read_lock for read fields. */ + static void reset_buffer_flags(struct tty_struct *tty) { unsigned long flags; @@ -176,6 +188,11 @@ static void reset_buffer_flags(struct tty_struct *tty) spin_lock_irqsave(&tty->read_lock, flags); tty->read_head = tty->read_tail = tty->read_cnt = 0; spin_unlock_irqrestore(&tty->read_lock, flags); + + mutex_lock(&tty->echo_lock); + tty->echo_pos = tty->echo_cnt = tty->echo_overrun = 0; + mutex_unlock(&tty->echo_lock); + tty->canon_head = tty->canon_data = tty->erasing = 0; memset(&tty->read_flags, 0, sizeof tty->read_flags); n_tty_set_room(tty); @@ -266,89 +283,118 @@ static inline int is_continuation(unsigned char c, struct tty_struct *tty) } /** - * opost - output post processor + * do_output_char - output one character * @c: character (or partial unicode symbol) * @tty: terminal device + * @space: space available in tty driver write buffer * - * Perform OPOST processing. Returns -1 when the output device is - * full and the character must be retried. Note that Linux currently - * ignores TABDLY, CRDLY, VTDLY, FFDLY and NLDLY. They simply aren't - * relevant in the world today. If you ever need them, add them here. + * This is a helper function that handles one output character + * (including special characters like TAB, CR, LF, etc.), + * putting the results in the tty driver's write buffer. + * + * Note that Linux currently ignores TABDLY, CRDLY, VTDLY, FFDLY + * and NLDLY. They simply aren't relevant in the world today. + * If you ever need them, add them here. + * + * Returns the number of bytes of buffer space used or -1 if + * no space left. * - * Called from both the receive and transmit sides and can be called - * re-entrantly. Relies on lock_kernel() for tty->column state. + * Locking: should be called under the output_lock to protect + * the column state and space left in the buffer */ -static int opost(unsigned char c, struct tty_struct *tty) +static int do_output_char(unsigned char c, struct tty_struct *tty, int space) { - int space, spaces; + int spaces; - space = tty_write_room(tty); if (!space) return -1; - lock_kernel(); - if (O_OPOST(tty)) { - switch (c) { - case '\n': - if (O_ONLRET(tty)) - tty->column = 0; - if (O_ONLCR(tty)) { - if (space < 2) { - unlock_kernel(); - return -1; - } - tty_put_char(tty, '\r'); - tty->column = 0; - } - tty->canon_column = tty->column; - break; - case '\r': - if (O_ONOCR(tty) && tty->column == 0) { - unlock_kernel(); - return 0; - } - if (O_OCRNL(tty)) { - c = '\n'; - if (O_ONLRET(tty)) - tty->canon_column = tty->column = 0; - break; - } + switch (c) { + case '\n': + if (O_ONLRET(tty)) + tty->column = 0; + if (O_ONLCR(tty)) { + if (space < 2) + return -1; tty->canon_column = tty->column = 0; + tty_put_char(tty, '\r'); + tty_put_char(tty, c); + return 2; + } + tty->canon_column = tty->column; + break; + case '\r': + if (O_ONOCR(tty) && tty->column == 0) + return 0; + if (O_OCRNL(tty)) { + c = '\n'; + if (O_ONLRET(tty)) + tty->canon_column = tty->column = 0; break; - case '\t': - spaces = 8 - (tty->column & 7); - if (O_TABDLY(tty) == XTABS) { - if (space < spaces) { - unlock_kernel(); - return -1; - } - tty->column += spaces; - tty->ops->write(tty, " ", spaces); - unlock_kernel(); - return 0; - } + } + tty->canon_column = tty->column = 0; + break; + case '\t': + spaces = 8 - (tty->column & 7); + if (O_TABDLY(tty) == XTABS) { + if (space < spaces) + return -1; tty->column += spaces; - break; - case '\b': - if (tty->column > 0) - tty->column--; - break; - default: + tty->ops->write(tty, " ", spaces); + return spaces; + } + tty->column += spaces; + break; + case '\b': + if (tty->column > 0) + tty->column--; + break; + default: + if (!iscntrl(c)) { if (O_OLCUC(tty)) c = toupper(c); - if (!iscntrl(c) && !is_continuation(c, tty)) + if (!is_continuation(c, tty)) tty->column++; - break; } + break; } + tty_put_char(tty, c); - unlock_kernel(); - return 0; + return 1; } /** - * opost_block - block postprocess + * process_output - output post processor + * @c: character (or partial unicode symbol) + * @tty: terminal device + * + * Perform OPOST processing. Returns -1 when the output device is + * full and the character must be retried. + * + * Locking: output_lock to protect column state and space left + * (also, this is called from n_tty_write under the + * tty layer write lock) + */ + +static int process_output(unsigned char c, struct tty_struct *tty) +{ + int space, retval; + + mutex_lock(&tty->output_lock); + + space = tty_write_room(tty); + retval = do_output_char(c, tty, space); + + mutex_unlock(&tty->output_lock); + if (retval < 0) + return -1; + else + return 0; +} + +/** + * process_output_block - block post processor * @tty: terminal device * @inbuf: user buffer * @nr: number of bytes @@ -358,26 +404,32 @@ static int opost(unsigned char c, struct tty_struct *tty) * the simple cases normally found and helps to generate blocks of * symbols for the console driver and thus improve performance. * - * Called from n_tty_write under the tty layer write lock. Relies - * on lock_kernel for the tty->column state. + * Locking: output_lock to protect column state and space left + * (also, this is called from n_tty_write under the + * tty layer write lock) */ -static ssize_t opost_block(struct tty_struct *tty, - const unsigned char *buf, unsigned int nr) +static ssize_t process_output_block(struct tty_struct *tty, + const unsigned char *buf, unsigned int nr) { int space; int i; const unsigned char *cp; + mutex_lock(&tty->output_lock); + space = tty_write_room(tty); - if (!space) + if (!space) { + mutex_unlock(&tty->output_lock); return 0; + } if (nr > space) nr = space; - lock_kernel(); for (i = 0, cp = buf; i < nr; i++, cp++) { - switch (*cp) { + unsigned char c = *cp; + + switch (c) { case '\n': if (O_ONLRET(tty)) tty->column = 0; @@ -399,54 +451,403 @@ static ssize_t opost_block(struct tty_struct *tty, tty->column--; break; default: - if (O_OLCUC(tty)) - goto break_out; - if (!iscntrl(*cp)) - tty->column++; + if (!iscntrl(c)) { + if (O_OLCUC(tty)) + goto break_out; + if (!is_continuation(c, tty)) + tty->column++; + } break; } } break_out: - if (tty->ops->flush_chars) - tty->ops->flush_chars(tty); i = tty->ops->write(tty, buf, i); - unlock_kernel(); + + mutex_unlock(&tty->output_lock); return i; } +/** + * process_echoes - write pending echo characters + * @tty: terminal device + * + * Write previously buffered echo (and other ldisc-generated) + * characters to the tty. + * + * Characters generated by the ldisc (including echoes) need to + * be buffered because the driver's write buffer can fill during + * heavy program output. Echoing straight to the driver will + * often fail under these conditions, causing lost characters and + * resulting mismatches of ldisc state information. + * + * Since the ldisc state must represent the characters actually sent + * to the driver at the time of the write, operations like certain + * changes in column state are also saved in the buffer and executed + * here. + * + * A circular fifo buffer is used so that the most recent characters + * are prioritized. Also, when control characters are echoed with a + * prefixed "^", the pair is treated atomically and thus not separated. + * + * Locking: output_lock to protect column state and space left, + * echo_lock to protect the echo buffer + */ + +static void process_echoes(struct tty_struct *tty) +{ + int space, nr; + unsigned char c; + unsigned char *cp, *buf_end; + + if (!tty->echo_cnt) + return; + + mutex_lock(&tty->output_lock); + mutex_lock(&tty->echo_lock); + + space = tty_write_room(tty); + + buf_end = tty->echo_buf + N_TTY_BUF_SIZE; + cp = tty->echo_buf + tty->echo_pos; + nr = tty->echo_cnt; + while (nr > 0) { + c = *cp; + if (c == ECHO_OP_START) { + unsigned char op; + unsigned char *opp; + int no_space_left = 0; + + /* + * If the buffer byte is the start of a multi-byte + * operation, get the next byte, which is either the + * op code or a control character value. + */ + opp = cp + 1; + if (opp == buf_end) + opp -= N_TTY_BUF_SIZE; + op = *opp; + + switch (op) { + unsigned int num_chars, num_bs; + + case ECHO_OP_ERASE_TAB: + if (++opp == buf_end) + opp -= N_TTY_BUF_SIZE; + num_chars = *opp; + + /* + * Determine how many columns to go back + * in order to erase the tab. + * This depends on the number of columns + * used by other characters within the tab + * area. If this (modulo 8) count is from + * the start of input rather than from a + * previous tab, we offset by canon column. + * Otherwise, tab spacing is normal. + */ + if (!(num_chars & 0x80)) + num_chars += tty->canon_column; + num_bs = 8 - (num_chars & 7); + + if (num_bs > space) { + no_space_left = 1; + break; + } + space -= num_bs; + while (num_bs--) { + tty_put_char(tty, '\b'); + if (tty->column > 0) + tty->column--; + } + cp += 3; + nr -= 3; + break; + + case ECHO_OP_SET_CANON_COL: + tty->canon_column = tty->column; + cp += 2; + nr -= 2; + break; + + case ECHO_OP_MOVE_BACK_COL: + if (tty->column > 0) + tty->column--; + cp += 2; + nr -= 2; + break; + + case ECHO_OP_START: + /* This is an escaped echo op start code */ + if (!space) { + no_space_left = 1; + break; + } + tty_put_char(tty, ECHO_OP_START); + tty->column++; + space--; + cp += 2; + nr -= 2; + break; + + default: + if (iscntrl(op)) { + if (L_ECHOCTL(tty)) { + /* + * Ensure there is enough space + * for the whole ctrl pair. + */ + if (space < 2) { + no_space_left = 1; + break; + } + tty_put_char(tty, '^'); + tty_put_char(tty, op ^ 0100); + tty->column += 2; + space -= 2; + } else { + if (!space) { + no_space_left = 1; + break; + } + tty_put_char(tty, op); + space--; + } + } + /* + * If above falls through, this was an + * undefined op. + */ + cp += 2; + nr -= 2; + } + + if (no_space_left) + break; + } else { + int retval; + + retval = do_output_char(c, tty, space); + if (retval < 0) + break; + space -= retval; + cp += 1; + nr -= 1; + } + + /* When end of circular buffer reached, wrap around */ + if (cp >= buf_end) + cp -= N_TTY_BUF_SIZE; + } + + if (nr == 0) { + tty->echo_pos = 0; + tty->echo_cnt = 0; + tty->echo_overrun = 0; + } else { + int num_processed = tty->echo_cnt - nr; + tty->echo_pos += num_processed; + tty->echo_pos &= N_TTY_BUF_SIZE - 1; + tty->echo_cnt = nr; + if (num_processed > 0) + tty->echo_overrun = 0; + } + + mutex_unlock(&tty->echo_lock); + mutex_unlock(&tty->output_lock); + + if (tty->ops->flush_chars) + tty->ops->flush_chars(tty); +} /** - * echo_char - echo characters + * add_echo_byte - add a byte to the echo buffer + * @c: unicode byte to echo + * @tty: terminal device + * + * Add a character or operation byte to the echo buffer. + * + * Should be called under the echo lock to protect the echo buffer. + */ + +static void add_echo_byte(unsigned char c, struct tty_struct *tty) +{ + int new_byte_pos; + + if (tty->echo_cnt == N_TTY_BUF_SIZE) { + /* Circular buffer is already at capacity */ + new_byte_pos = tty->echo_pos; + + /* + * Since the buffer start position needs to be advanced, + * be sure to step by a whole operation byte group. + */ + if (tty->echo_buf[tty->echo_pos] == ECHO_OP_START) { + if (tty->echo_buf[(tty->echo_pos + 1) & + (N_TTY_BUF_SIZE - 1)] == + ECHO_OP_ERASE_TAB) { + tty->echo_pos += 3; + tty->echo_cnt -= 2; + } else { + tty->echo_pos += 2; + tty->echo_cnt -= 1; + } + } else { + tty->echo_pos++; + } + tty->echo_pos &= N_TTY_BUF_SIZE - 1; + + tty->echo_overrun = 1; + } else { + new_byte_pos = tty->echo_pos + tty->echo_cnt; + new_byte_pos &= N_TTY_BUF_SIZE - 1; + tty->echo_cnt++; + } + + tty->echo_buf[new_byte_pos] = c; +} + +/** + * echo_move_back_col - add operation to move back a column + * @tty: terminal device + * + * Add an operation to the echo buffer to move back one column. + * + * Locking: echo_lock to protect the echo buffer + */ + +static void echo_move_back_col(struct tty_struct *tty) +{ + mutex_lock(&tty->echo_lock); + + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(ECHO_OP_MOVE_BACK_COL, tty); + + mutex_unlock(&tty->echo_lock); +} + +/** + * echo_set_canon_col - add operation to set the canon column + * @tty: terminal device + * + * Add an operation to the echo buffer to set the canon column + * to the current column. + * + * Locking: echo_lock to protect the echo buffer + */ + +static void echo_set_canon_col(struct tty_struct *tty) +{ + mutex_lock(&tty->echo_lock); + + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(ECHO_OP_SET_CANON_COL, tty); + + mutex_unlock(&tty->echo_lock); +} + +/** + * echo_erase_tab - add operation to erase a tab + * @num_chars: number of character columns already used + * @after_tab: true if num_chars starts after a previous tab + * @tty: terminal device + * + * Add an operation to the echo buffer to erase a tab. + * + * Called by the eraser function, which knows how many character + * columns have been used since either a previous tab or the start + * of input. This information will be used later, along with + * canon column (if applicable), to go back the correct number + * of columns. + * + * Locking: echo_lock to protect the echo buffer + */ + +static void echo_erase_tab(unsigned int num_chars, int after_tab, + struct tty_struct *tty) +{ + mutex_lock(&tty->echo_lock); + + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(ECHO_OP_ERASE_TAB, tty); + + /* We only need to know this modulo 8 (tab spacing) */ + num_chars &= 7; + + /* Set the high bit as a flag if num_chars is after a previous tab */ + if (after_tab) + num_chars |= 0x80; + + add_echo_byte(num_chars, tty); + + mutex_unlock(&tty->echo_lock); +} + +/** + * echo_char_raw - echo a character raw * @c: unicode byte to echo * @tty: terminal device * * Echo user input back onto the screen. This must be called only when * L_ECHO(tty) is true. Called from the driver receive_buf path. * - * Relies on BKL for tty column locking + * This variant does not treat control characters specially. + * + * Locking: echo_lock to protect the echo buffer + */ + +static void echo_char_raw(unsigned char c, struct tty_struct *tty) +{ + mutex_lock(&tty->echo_lock); + + if (c == ECHO_OP_START) { + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(ECHO_OP_START, tty); + } else { + add_echo_byte(c, tty); + } + + mutex_unlock(&tty->echo_lock); +} + +/** + * echo_char - echo a character + * @c: unicode byte to echo + * @tty: terminal device + * + * Echo user input back onto the screen. This must be called only when + * L_ECHO(tty) is true. Called from the driver receive_buf path. + * + * This variant tags control characters to be possibly echoed as + * as "^X" (where X is the letter representing the control char). + * + * Locking: echo_lock to protect the echo buffer */ static void echo_char(unsigned char c, struct tty_struct *tty) { - if (L_ECHOCTL(tty) && iscntrl(c) && c != '\t') { - tty_put_char(tty, '^'); - tty_put_char(tty, c ^ 0100); - tty->column += 2; - } else - opost(c, tty); + mutex_lock(&tty->echo_lock); + + if (c == ECHO_OP_START) { + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(ECHO_OP_START, tty); + } else { + if (iscntrl(c) && c != '\t') + add_echo_byte(ECHO_OP_START, tty); + add_echo_byte(c, tty); + } + + mutex_unlock(&tty->echo_lock); } /** - * finsh_erasing - complete erase + * finish_erasing - complete erase * @tty: tty doing the erase - * - * Relies on BKL for tty column locking */ + static inline void finish_erasing(struct tty_struct *tty) { if (tty->erasing) { - tty_put_char(tty, '/'); - tty->column++; + echo_char_raw('/', tty); tty->erasing = 0; } } @@ -460,7 +861,7 @@ static inline void finish_erasing(struct tty_struct *tty) * present in the stream from the driver layer. Handles the complexities * of UTF-8 multibyte symbols. * - * Locking: read_lock for tty buffers, BKL for column/erasing state + * Locking: read_lock for tty buffers */ static void eraser(unsigned char c, struct tty_struct *tty) @@ -471,7 +872,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) /* FIXME: locking needed ? */ if (tty->read_head == tty->canon_head) { - /* opost('\a', tty); */ /* what do you think? */ + /* process_output('\a', tty); */ /* what do you think? */ return; } if (c == ERASE_CHAR(tty)) @@ -497,7 +898,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) echo_char(KILL_CHAR(tty), tty); /* Add a newline if ECHOK is on and ECHOKE is off. */ if (L_ECHOK(tty)) - opost('\n', tty); + echo_char_raw('\n', tty); return; } kill_type = KILL; @@ -533,67 +934,61 @@ static void eraser(unsigned char c, struct tty_struct *tty) if (L_ECHO(tty)) { if (L_ECHOPRT(tty)) { if (!tty->erasing) { - tty_put_char(tty, '\\'); - tty->column++; + echo_char_raw('\\', tty); tty->erasing = 1; } /* if cnt > 1, output a multi-byte character */ echo_char(c, tty); while (--cnt > 0) { head = (head+1) & (N_TTY_BUF_SIZE-1); - tty_put_char(tty, tty->read_buf[head]); + echo_char_raw(tty->read_buf[head], tty); + echo_move_back_col(tty); } } else if (kill_type == ERASE && !L_ECHOE(tty)) { echo_char(ERASE_CHAR(tty), tty); } else if (c == '\t') { - unsigned int col = tty->canon_column; - unsigned long tail = tty->canon_head; - - /* Find the column of the last char. */ - while (tail != tty->read_head) { + unsigned int num_chars = 0; + int after_tab = 0; + unsigned long tail = tty->read_head; + + /* + * Count the columns used for characters + * since the start of input or after a + * previous tab. + * This info is used to go back the correct + * number of columns. + */ + while (tail != tty->canon_head) { + tail = (tail-1) & (N_TTY_BUF_SIZE-1); c = tty->read_buf[tail]; - if (c == '\t') - col = (col | 7) + 1; - else if (iscntrl(c)) { + if (c == '\t') { + after_tab = 1; + break; + } else if (iscntrl(c)) { if (L_ECHOCTL(tty)) - col += 2; - } else if (!is_continuation(c, tty)) - col++; - tail = (tail+1) & (N_TTY_BUF_SIZE-1); - } - - /* should never happen */ - if (tty->column > 0x80000000) - tty->column = 0; - - /* Now backup to that column. */ - while (tty->column > col) { - /* Can't use opost here. */ - tty_put_char(tty, '\b'); - if (tty->column > 0) - tty->column--; + num_chars += 2; + } else if (!is_continuation(c, tty)) { + num_chars++; + } } + echo_erase_tab(num_chars, after_tab, tty); } else { if (iscntrl(c) && L_ECHOCTL(tty)) { - tty_put_char(tty, '\b'); - tty_put_char(tty, ' '); - tty_put_char(tty, '\b'); - if (tty->column > 0) - tty->column--; + echo_char_raw('\b', tty); + echo_char_raw(' ', tty); + echo_char_raw('\b', tty); } if (!iscntrl(c) || L_ECHOCTL(tty)) { - tty_put_char(tty, '\b'); - tty_put_char(tty, ' '); - tty_put_char(tty, '\b'); - if (tty->column > 0) - tty->column--; + echo_char_raw('\b', tty); + echo_char_raw(' ', tty); + echo_char_raw('\b', tty); } } } if (kill_type == ERASE) break; } - if (tty->read_head == tty->canon_head) + if (tty->read_head == tty->canon_head && L_ECHO(tty)) finish_erasing(tty); } @@ -712,6 +1107,7 @@ static inline void n_tty_receive_parity_error(struct tty_struct *tty, static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) { unsigned long flags; + int parmrk; if (tty->raw) { put_tty_queue(c, tty); @@ -721,18 +1117,21 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) if (I_ISTRIP(tty)) c &= 0x7f; if (I_IUCLC(tty) && L_IEXTEN(tty)) - c=tolower(c); + c = tolower(c); if (tty->stopped && !tty->flow_stopped && I_IXON(tty) && - ((I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty)) || - c == INTR_CHAR(tty) || c == QUIT_CHAR(tty) || c == SUSP_CHAR(tty))) + I_IXANY(tty) && c != START_CHAR(tty) && c != STOP_CHAR(tty) && + c != INTR_CHAR(tty) && c != QUIT_CHAR(tty) && c != SUSP_CHAR(tty)) { start_tty(tty); + process_echoes(tty); + } if (tty->closing) { if (I_IXON(tty)) { - if (c == START_CHAR(tty)) + if (c == START_CHAR(tty)) { start_tty(tty); - else if (c == STOP_CHAR(tty)) + process_echoes(tty); + } else if (c == STOP_CHAR(tty)) stop_tty(tty); } return; @@ -745,19 +1144,23 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) * up. */ if (!test_bit(c, tty->process_char_map) || tty->lnext) { - finish_erasing(tty); tty->lnext = 0; + parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0; + if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) { + /* beep if no space */ + if (L_ECHO(tty)) + process_output('\a', tty); + return; + } if (L_ECHO(tty)) { - if (tty->read_cnt >= N_TTY_BUF_SIZE-1) { - tty_put_char(tty, '\a'); /* beep if no space */ - return; - } + finish_erasing(tty); /* Record the column of first canon char. */ if (tty->canon_head == tty->read_head) - tty->canon_column = tty->column; + echo_set_canon_col(tty); echo_char(c, tty); + process_echoes(tty); } - if (I_PARMRK(tty) && c == (unsigned char) '\377') + if (parmrk) put_tty_queue(c, tty); put_tty_queue(c, tty); return; @@ -766,6 +1169,7 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) if (I_IXON(tty)) { if (c == START_CHAR(tty)) { start_tty(tty); + process_echoes(tty); return; } if (c == STOP_CHAR(tty)) { @@ -786,7 +1190,6 @@ static inline void n_tty_receive_char(struct tty_struct *tty, unsigned char c) if (c == SUSP_CHAR(tty)) { send_signal: /* - * Echo character, and then send the signal. * Note that we do not use isig() here because we want * the order to be: * 1) flush, 2) echo, 3) signal @@ -795,8 +1198,12 @@ send_signal: n_tty_flush_buffer(tty); tty_driver_flush_buffer(tty); } - if (L_ECHO(tty)) + if (I_IXON(tty)) + start_tty(tty); + if (L_ECHO(tty)) { echo_char(c, tty); + process_echoes(tty); + } if (tty->pgrp) kill_pgrp(tty->pgrp, signal, 1); return; @@ -815,6 +1222,7 @@ send_signal: if (c == ERASE_CHAR(tty) || c == KILL_CHAR(tty) || (c == WERASE_CHAR(tty) && L_IEXTEN(tty))) { eraser(c, tty); + process_echoes(tty); return; } if (c == LNEXT_CHAR(tty) && L_IEXTEN(tty)) { @@ -822,8 +1230,9 @@ send_signal: if (L_ECHO(tty)) { finish_erasing(tty); if (L_ECHOCTL(tty)) { - tty_put_char(tty, '^'); - tty_put_char(tty, '\b'); + echo_char_raw('^', tty); + echo_char_raw('\b', tty); + process_echoes(tty); } } return; @@ -834,22 +1243,29 @@ send_signal: finish_erasing(tty); echo_char(c, tty); - opost('\n', tty); + echo_char_raw('\n', tty); while (tail != tty->read_head) { echo_char(tty->read_buf[tail], tty); tail = (tail+1) & (N_TTY_BUF_SIZE-1); } + process_echoes(tty); return; } if (c == '\n') { + if (tty->read_cnt >= N_TTY_BUF_SIZE) { + if (L_ECHO(tty)) + process_output('\a', tty); + return; + } if (L_ECHO(tty) || L_ECHONL(tty)) { - if (tty->read_cnt >= N_TTY_BUF_SIZE-1) - tty_put_char(tty, '\a'); - opost('\n', tty); + echo_char_raw('\n', tty); + process_echoes(tty); } goto handle_newline; } if (c == EOF_CHAR(tty)) { + if (tty->read_cnt >= N_TTY_BUF_SIZE) + return; if (tty->canon_head != tty->read_head) set_bit(TTY_PUSH, &tty->flags); c = __DISABLED_CHAR; @@ -857,22 +1273,28 @@ send_signal: } if ((c == EOL_CHAR(tty)) || (c == EOL2_CHAR(tty) && L_IEXTEN(tty))) { + parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) + ? 1 : 0; + if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk)) { + if (L_ECHO(tty)) + process_output('\a', tty); + return; + } /* * XXX are EOL_CHAR and EOL2_CHAR echoed?!? */ if (L_ECHO(tty)) { - if (tty->read_cnt >= N_TTY_BUF_SIZE-1) - tty_put_char(tty, '\a'); /* Record the column of first canon char. */ if (tty->canon_head == tty->read_head) - tty->canon_column = tty->column; + echo_set_canon_col(tty); echo_char(c, tty); + process_echoes(tty); } /* * XXX does PARMRK doubling happen for * EOL_CHAR and EOL2_CHAR? */ - if (I_PARMRK(tty) && c == (unsigned char) '\377') + if (parmrk) put_tty_queue(c, tty); handle_newline: @@ -889,23 +1311,27 @@ handle_newline: } } - finish_erasing(tty); + parmrk = (c == (unsigned char) '\377' && I_PARMRK(tty)) ? 1 : 0; + if (tty->read_cnt >= (N_TTY_BUF_SIZE - parmrk - 1)) { + /* beep if no space */ + if (L_ECHO(tty)) + process_output('\a', tty); + return; + } if (L_ECHO(tty)) { - if (tty->read_cnt >= N_TTY_BUF_SIZE-1) { - tty_put_char(tty, '\a'); /* beep if no space */ - return; - } + finish_erasing(tty); if (c == '\n') - opost('\n', tty); + echo_char_raw('\n', tty); else { /* Record the column of first canon char. */ if (tty->canon_head == tty->read_head) - tty->canon_column = tty->column; + echo_set_canon_col(tty); echo_char(c, tty); } + process_echoes(tty); } - if (I_PARMRK(tty) && c == (unsigned char) '\377') + if (parmrk) put_tty_queue(c, tty); put_tty_queue(c, tty); @@ -923,10 +1349,11 @@ handle_newline: static void n_tty_write_wakeup(struct tty_struct *tty) { - if (tty->fasync) { - set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + /* Write out any echoed characters that are still pending */ + process_echoes(tty); + + if (tty->fasync && test_and_clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) kill_fasync(&tty->fasync, SIGIO, POLL_OUT); - } } /** @@ -1134,6 +1561,10 @@ static void n_tty_close(struct tty_struct *tty) free_buf(tty->read_buf); tty->read_buf = NULL; } + if (tty->echo_buf) { + free_buf(tty->echo_buf); + tty->echo_buf = NULL; + } } /** @@ -1151,13 +1582,19 @@ static int n_tty_open(struct tty_struct *tty) if (!tty) return -EINVAL; - /* This one is ugly. Currently a malloc failure here can panic */ + /* These are ugly. Currently a malloc failure here can panic */ if (!tty->read_buf) { tty->read_buf = alloc_buf(); if (!tty->read_buf) return -ENOMEM; } + if (!tty->echo_buf) { + tty->echo_buf = alloc_buf(); + if (!tty->echo_buf) + return -ENOMEM; + } memset(tty->read_buf, 0, N_TTY_BUF_SIZE); + memset(tty->echo_buf, 0, N_TTY_BUF_SIZE); reset_buffer_flags(tty); tty->column = 0; n_tty_set_termios(tty, NULL); @@ -1487,16 +1924,23 @@ do_it_again: * @buf: userspace buffer pointer * @nr: size of I/O * - * Write function of the terminal device. This is serialized with + * Write function of the terminal device. This is serialized with * respect to other write callers but not to termios changes, reads - * and other such events. We must be careful with N_TTY as the receive - * code will echo characters, thus calling driver write methods. + * and other such events. Since the receive code will echo characters, + * thus calling driver write methods, the output_lock is used in + * the output processing functions called here as well as in the + * echo processing function to protect the column state and space + * left in the buffer. * * This code must be sure never to sleep through a hangup. + * + * Locking: output_lock to protect column state and space left + * (note that the process_output*() functions take this + * lock themselves) */ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, - const unsigned char *buf, size_t nr) + const unsigned char *buf, size_t nr) { const unsigned char *b = buf; DECLARE_WAITQUEUE(wait, current); @@ -1510,6 +1954,9 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, return retval; } + /* Write out any echoed characters that are still pending */ + process_echoes(tty); + add_wait_queue(&tty->write_wait, &wait); while (1) { set_current_state(TASK_INTERRUPTIBLE); @@ -1523,7 +1970,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, } if (O_OPOST(tty) && !(test_bit(TTY_HW_COOK_OUT, &tty->flags))) { while (nr > 0) { - ssize_t num = opost_block(tty, b, nr); + ssize_t num = process_output_block(tty, b, nr); if (num < 0) { if (num == -EAGAIN) break; @@ -1535,7 +1982,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, if (nr == 0) break; c = *b; - if (opost(c, tty) < 0) + if (process_output(c, tty) < 0) break; b++; nr--; } @@ -1565,6 +2012,8 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, break_out: __set_current_state(TASK_RUNNING); remove_wait_queue(&tty->write_wait, &wait); + if (b - buf != nr && tty->fasync) + set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); return (b - buf) ? b - buf : retval; } @@ -1663,4 +2112,3 @@ struct tty_ldisc_ops tty_ldisc_N_TTY = { .receive_buf = n_tty_receive_buf, .write_wakeup = n_tty_write_wakeup }; - diff --git a/drivers/char/nozomi.c b/drivers/char/nozomi.c index 9a34a1935283..d6102b644b55 100644 --- a/drivers/char/nozomi.c +++ b/drivers/char/nozomi.c @@ -353,6 +353,7 @@ struct ctrl_ul { /* This holds all information that is needed regarding a port */ struct port { + struct tty_port port; u8 update_flow_control; struct ctrl_ul ctrl_ul; struct ctrl_dl ctrl_dl; @@ -365,8 +366,6 @@ struct port { u8 toggle_ul; u16 token_dl; - struct tty_struct *tty; - int tty_open_count; /* mutex to ensure one access patch to this port */ struct mutex tty_sem; wait_queue_head_t tty_wait; @@ -788,14 +787,14 @@ static void disable_transmit_dl(enum port_type port, struct nozomi *dc) * Return 1 - send buffer to card and ack. * Return 0 - don't ack, don't send buffer to card. */ -static int send_data(enum port_type index, const struct nozomi *dc) +static int send_data(enum port_type index, struct nozomi *dc) { u32 size = 0; - const struct port *port = &dc->port[index]; + struct port *port = &dc->port[index]; const u8 toggle = port->toggle_ul; void __iomem *addr = port->ul_addr[toggle]; const u32 ul_size = port->ul_size[toggle]; - struct tty_struct *tty = port->tty; + struct tty_struct *tty = tty_port_tty_get(&port->port); /* Get data from tty and place in buf for now */ size = __kfifo_get(port->fifo_ul, dc->send_buf, @@ -803,6 +802,7 @@ static int send_data(enum port_type index, const struct nozomi *dc) if (size == 0) { DBG4("No more data to send, disable link:"); + tty_kref_put(tty); return 0; } @@ -815,6 +815,7 @@ static int send_data(enum port_type index, const struct nozomi *dc) if (tty) tty_wakeup(tty); + tty_kref_put(tty); return 1; } @@ -826,7 +827,7 @@ static int receive_data(enum port_type index, struct nozomi *dc) u32 offset = 4; struct port *port = &dc->port[index]; void __iomem *addr = port->dl_addr[port->toggle_dl]; - struct tty_struct *tty = port->tty; + struct tty_struct *tty = tty_port_tty_get(&port->port); int i; if (unlikely(!tty)) { @@ -870,7 +871,7 @@ static int receive_data(enum port_type index, struct nozomi *dc) } set_bit(index, &dc->flip); - + tty_kref_put(tty); return 1; } @@ -1276,9 +1277,15 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id) exit_handler: spin_unlock(&dc->spin_mutex); - for (a = 0; a < NOZOMI_MAX_PORTS; a++) - if (test_and_clear_bit(a, &dc->flip)) - tty_flip_buffer_push(dc->port[a].tty); + for (a = 0; a < NOZOMI_MAX_PORTS; a++) { + struct tty_struct *tty; + if (test_and_clear_bit(a, &dc->flip)) { + tty = tty_port_tty_get(&dc->port[a].port); + if (tty) + tty_flip_buffer_push(tty); + tty_kref_put(tty); + } + } return IRQ_HANDLED; none: spin_unlock(&dc->spin_mutex); @@ -1453,12 +1460,10 @@ static int __devinit nozomi_card_init(struct pci_dev *pdev, for (i = 0; i < MAX_PORT; i++) { mutex_init(&dc->port[i].tty_sem); - dc->port[i].tty_open_count = 0; - dc->port[i].tty = NULL; + tty_port_init(&dc->port[i].port); tty_register_device(ntty_driver, dc->index_start + i, &pdev->dev); } - return 0; err_free_sbuf: @@ -1482,14 +1487,16 @@ static void __devexit tty_exit(struct nozomi *dc) flush_scheduled_work(); - for (i = 0; i < MAX_PORT; ++i) - if (dc->port[i].tty && \ - list_empty(&dc->port[i].tty->hangup_work.entry)) - tty_hangup(dc->port[i].tty); - + for (i = 0; i < MAX_PORT; ++i) { + struct tty_struct *tty = tty_port_tty_get(&dc->port[i].port); + if (tty && list_empty(&tty->hangup_work.entry)) + tty_hangup(tty); + tty_kref_put(tty); + } + /* Racy below - surely should wait for scheduled work to be done or + complete off a hangup method ? */ while (dc->open_ttys) msleep(1); - for (i = dc->index_start; i < dc->index_start + MAX_PORT; ++i) tty_unregister_device(ntty_driver, i); } @@ -1579,23 +1586,22 @@ static int ntty_open(struct tty_struct *tty, struct file *file) if (mutex_lock_interruptible(&port->tty_sem)) return -ERESTARTSYS; - port->tty_open_count++; + port->port.count++; dc->open_ttys++; /* Enable interrupt downlink for channel */ - if (port->tty_open_count == 1) { + if (port->port.count == 1) { + /* FIXME: is this needed now ? */ tty->low_latency = 1; tty->driver_data = port; - port->tty = tty; + tty_port_tty_set(&port->port, tty); DBG1("open: %d", port->token_dl); spin_lock_irqsave(&dc->spin_mutex, flags); dc->last_ier = dc->last_ier | port->token_dl; writew(dc->last_ier, dc->reg_ier); spin_unlock_irqrestore(&dc->spin_mutex, flags); } - mutex_unlock(&port->tty_sem); - return 0; } @@ -1606,31 +1612,30 @@ static int ntty_open(struct tty_struct *tty, struct file *file) static void ntty_close(struct tty_struct *tty, struct file *file) { struct nozomi *dc = get_dc_by_tty(tty); - struct port *port = tty->driver_data; + struct port *nport = tty->driver_data; + struct tty_port *port = &nport->port; unsigned long flags; - if (!dc || !port) + if (!dc || !nport) return; - if (mutex_lock_interruptible(&port->tty_sem)) - return; + /* Users cannot interrupt a close */ + mutex_lock(&nport->tty_sem); - if (!port->tty_open_count) - goto exit; + WARN_ON(!port->count); dc->open_ttys--; - port->tty_open_count--; + port->count--; + tty_port_tty_set(port, NULL); - if (port->tty_open_count == 0) { - DBG1("close: %d", port->token_dl); + if (port->count == 0) { + DBG1("close: %d", nport->token_dl); spin_lock_irqsave(&dc->spin_mutex, flags); - dc->last_ier &= ~(port->token_dl); + dc->last_ier &= ~(nport->token_dl); writew(dc->last_ier, dc->reg_ier); spin_unlock_irqrestore(&dc->spin_mutex, flags); } - -exit: - mutex_unlock(&port->tty_sem); + mutex_unlock(&nport->tty_sem); } /* @@ -1660,7 +1665,7 @@ static int ntty_write(struct tty_struct *tty, const unsigned char *buffer, return -EAGAIN; } - if (unlikely(!port->tty_open_count)) { + if (unlikely(!port->port.count)) { DBG1(" "); goto exit; } @@ -1710,7 +1715,7 @@ static int ntty_write_room(struct tty_struct *tty) if (!mutex_trylock(&port->tty_sem)) return 0; - if (!port->tty_open_count) + if (!port->port.count) goto exit; room = port->fifo_ul->size - __kfifo_len(port->fifo_ul); @@ -1866,7 +1871,7 @@ static s32 ntty_chars_in_buffer(struct tty_struct *tty) goto exit_in_buffer; } - if (unlikely(!port->tty_open_count)) { + if (unlikely(!port->port.count)) { dev_err(&dc->pdev->dev, "No tty open?\n"); rval = -ENODEV; goto exit_in_buffer; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 4d64a02612a4..dc073e167abc 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -138,20 +138,15 @@ struct _input_signal_events { */ typedef struct _mgslpc_info { + struct tty_port port; void *if_ptr; /* General purpose pointer (used by SPPP) */ int magic; - int flags; - int count; /* count of opens */ int line; - unsigned short close_delay; - unsigned short closing_wait; /* time to wait before closing */ struct mgsl_icount icount; - struct tty_struct *tty; int timeout; int x_char; /* xon/xoff character */ - int blocked_open; /* # of blocked opens */ unsigned char read_status_mask; unsigned char ignore_status_mask; @@ -170,9 +165,6 @@ typedef struct _mgslpc_info { int rx_buf_count; /* total number of rx buffers */ int rx_frame_count; /* number of full rx buffers */ - wait_queue_head_t open_wait; - wait_queue_head_t close_wait; - wait_queue_head_t status_event_wait_q; wait_queue_head_t event_wait_q; struct timer_list tx_timer; /* HDLC transmit timeout timer */ @@ -375,7 +367,7 @@ static void irq_enable(MGSLPC_INFO *info, unsigned char channel, unsigned short static void rx_start(MGSLPC_INFO *info); static void rx_stop(MGSLPC_INFO *info); -static void tx_start(MGSLPC_INFO *info); +static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty); static void tx_stop(MGSLPC_INFO *info); static void tx_set_idle(MGSLPC_INFO *info); @@ -389,7 +381,8 @@ static void async_mode(MGSLPC_INFO *info); static void tx_timeout(unsigned long context); -static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg); +static int carrier_raised(struct tty_port *port); +static void raise_dtr_rts(struct tty_port *port); #if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) @@ -410,7 +403,7 @@ static void release_resources(MGSLPC_INFO *info); static void mgslpc_add_device(MGSLPC_INFO *info); static void mgslpc_remove_device(MGSLPC_INFO *info); -static bool rx_get_frame(MGSLPC_INFO *info); +static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty); static void rx_reset_buffers(MGSLPC_INFO *info); static int rx_alloc_buffers(MGSLPC_INFO *info); static void rx_free_buffers(MGSLPC_INFO *info); @@ -421,7 +414,7 @@ static irqreturn_t mgslpc_isr(int irq, void *dev_id); * Bottom half interrupt handlers */ static void bh_handler(struct work_struct *work); -static void bh_transmit(MGSLPC_INFO *info); +static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty); static void bh_status(MGSLPC_INFO *info); /* @@ -432,10 +425,10 @@ static int tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); static int get_stats(MGSLPC_INFO *info, struct mgsl_icount __user *user_icount); static int get_params(MGSLPC_INFO *info, MGSL_PARAMS __user *user_params); -static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params); +static int set_params(MGSLPC_INFO *info, MGSL_PARAMS __user *new_params, struct tty_struct *tty); static int get_txidle(MGSLPC_INFO *info, int __user *idle_mode); static int set_txidle(MGSLPC_INFO *info, int idle_mode); -static int set_txenable(MGSLPC_INFO *info, int enable); +static int set_txenable(MGSLPC_INFO *info, int enable, struct tty_struct *tty); static int tx_abort(MGSLPC_INFO *info); static int set_rxenable(MGSLPC_INFO *info, int enable); static int wait_events(MGSLPC_INFO *info, int __user *mask); @@ -474,7 +467,7 @@ static struct tty_driver *serial_driver; /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 -static void mgslpc_change_params(MGSLPC_INFO *info); +static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty); static void mgslpc_wait_until_sent(struct tty_struct *tty, int timeout); /* PCMCIA prototypes */ @@ -517,6 +510,11 @@ static void ldisc_receive_buf(struct tty_struct *tty, } } +static const struct tty_port_operations mgslpc_port_ops = { + .carrier_raised = carrier_raised, + .raise_dtr_rts = raise_dtr_rts +}; + static int mgslpc_probe(struct pcmcia_device *link) { MGSLPC_INFO *info; @@ -532,12 +530,12 @@ static int mgslpc_probe(struct pcmcia_device *link) } info->magic = MGSLPC_MAGIC; + tty_port_init(&info->port); + info->port.ops = &mgslpc_port_ops; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; - info->close_delay = 5*HZ/10; - info->closing_wait = 30*HZ; - init_waitqueue_head(&info->open_wait); - init_waitqueue_head(&info->close_wait); + info->port.close_delay = 5*HZ/10; + info->port.closing_wait = 30*HZ; init_waitqueue_head(&info->status_event_wait_q); init_waitqueue_head(&info->event_wait_q); spin_lock_init(&info->lock); @@ -784,7 +782,7 @@ static void tx_release(struct tty_struct *tty) spin_lock_irqsave(&info->lock,flags); if (!info->tx_enabled) - tx_start(info); + tx_start(info, tty); spin_unlock_irqrestore(&info->lock,flags); } @@ -823,6 +821,7 @@ static int bh_action(MGSLPC_INFO *info) static void bh_handler(struct work_struct *work) { MGSLPC_INFO *info = container_of(work, MGSLPC_INFO, task); + struct tty_struct *tty; int action; if (!info) @@ -833,6 +832,7 @@ static void bh_handler(struct work_struct *work) __FILE__,__LINE__,info->device_name); info->bh_running = true; + tty = tty_port_tty_get(&info->port); while((action = bh_action(info)) != 0) { @@ -844,10 +844,10 @@ static void bh_handler(struct work_struct *work) switch (action) { case BH_RECEIVE: - while(rx_get_frame(info)); + while(rx_get_frame(info, tty)); break; case BH_TRANSMIT: - bh_transmit(info); + bh_transmit(info, tty); break; case BH_STATUS: bh_status(info); @@ -859,14 +859,14 @@ static void bh_handler(struct work_struct *work) } } + tty_kref_put(tty); if (debug_level >= DEBUG_LEVEL_BH) printk( "%s(%d):bh_handler(%s) exit\n", __FILE__,__LINE__,info->device_name); } -static void bh_transmit(MGSLPC_INFO *info) +static void bh_transmit(MGSLPC_INFO *info, struct tty_struct *tty) { - struct tty_struct *tty = info->tty; if (debug_level >= DEBUG_LEVEL_BH) printk("bh_transmit() entry on %s\n", info->device_name); @@ -945,12 +945,11 @@ static void rx_ready_hdlc(MGSLPC_INFO *info, int eom) issue_command(info, CHA, CMD_RXFIFO); } -static void rx_ready_async(MGSLPC_INFO *info, int tcd) +static void rx_ready_async(MGSLPC_INFO *info, int tcd, struct tty_struct *tty) { unsigned char data, status, flag; int fifo_count; int work = 0; - struct tty_struct *tty = info->tty; struct mgsl_icount *icount = &info->icount; if (tcd) { @@ -1013,7 +1012,7 @@ static void rx_ready_async(MGSLPC_INFO *info, int tcd) } -static void tx_done(MGSLPC_INFO *info) +static void tx_done(MGSLPC_INFO *info, struct tty_struct *tty) { if (!info->tx_active) return; @@ -1042,7 +1041,7 @@ static void tx_done(MGSLPC_INFO *info) else #endif { - if (info->tty->stopped || info->tty->hw_stopped) { + if (tty->stopped || tty->hw_stopped) { tx_stop(info); return; } @@ -1050,7 +1049,7 @@ static void tx_done(MGSLPC_INFO *info) } } -static void tx_ready(MGSLPC_INFO *info) +static void tx_ready(MGSLPC_INFO *info, struct tty_struct *tty) { unsigned char fifo_count = 32; int c; @@ -1062,7 +1061,7 @@ static void tx_ready(MGSLPC_INFO *info) if (!info->tx_active) return; } else { - if (info->tty->stopped || info->tty->hw_stopped) { + if (tty->stopped || tty->hw_stopped) { tx_stop(info); return; } @@ -1099,7 +1098,7 @@ static void tx_ready(MGSLPC_INFO *info) } } -static void cts_change(MGSLPC_INFO *info) +static void cts_change(MGSLPC_INFO *info, struct tty_struct *tty) { get_signals(info); if ((info->cts_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT) @@ -1112,14 +1111,14 @@ static void cts_change(MGSLPC_INFO *info) wake_up_interruptible(&info->status_event_wait_q); wake_up_interruptible(&info->event_wait_q); - if (info->flags & ASYNC_CTS_FLOW) { - if (info->tty->hw_stopped) { + if (info->port.flags & ASYNC_CTS_FLOW) { + if (tty->hw_stopped) { if (info->serial_signals & SerialSignal_CTS) { if (debug_level >= DEBUG_LEVEL_ISR) printk("CTS tx start..."); - if (info->tty) - info->tty->hw_stopped = 0; - tx_start(info); + if (tty) + tty->hw_stopped = 0; + tx_start(info, tty); info->pending_bh |= BH_TRANSMIT; return; } @@ -1127,8 +1126,8 @@ static void cts_change(MGSLPC_INFO *info) if (!(info->serial_signals & SerialSignal_CTS)) { if (debug_level >= DEBUG_LEVEL_ISR) printk("CTS tx stop..."); - if (info->tty) - info->tty->hw_stopped = 1; + if (tty) + tty->hw_stopped = 1; tx_stop(info); } } @@ -1136,7 +1135,7 @@ static void cts_change(MGSLPC_INFO *info) info->pending_bh |= BH_STATUS; } -static void dcd_change(MGSLPC_INFO *info) +static void dcd_change(MGSLPC_INFO *info, struct tty_struct *tty) { get_signals(info); if ((info->dcd_chkcount)++ >= IO_PIN_SHUTDOWN_LIMIT) @@ -1158,17 +1157,17 @@ static void dcd_change(MGSLPC_INFO *info) wake_up_interruptible(&info->status_event_wait_q); wake_up_interruptible(&info->event_wait_q); - if (info->flags & ASYNC_CHECK_CD) { + if (info->port.flags & ASYNC_CHECK_CD) { if (debug_level >= DEBUG_LEVEL_ISR) printk("%s CD now %s...", info->device_name, (info->serial_signals & SerialSignal_DCD) ? "on" : "off"); if (info->serial_signals & SerialSignal_DCD) - wake_up_interruptible(&info->open_wait); + wake_up_interruptible(&info->port.open_wait); else { if (debug_level >= DEBUG_LEVEL_ISR) printk("doing serial hangup..."); - if (info->tty) - tty_hangup(info->tty); + if (tty) + tty_hangup(tty); } } info->pending_bh |= BH_STATUS; @@ -1214,6 +1213,7 @@ static void ri_change(MGSLPC_INFO *info) static irqreturn_t mgslpc_isr(int dummy, void *dev_id) { MGSLPC_INFO *info = dev_id; + struct tty_struct *tty; unsigned short isr; unsigned char gis, pis; int count=0; @@ -1224,6 +1224,8 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) if (!(info->p_dev->_locked)) return IRQ_HANDLED; + tty = tty_port_tty_get(&info->port); + spin_lock(&info->lock); while ((gis = read_reg(info, CHA + GIS))) { @@ -1239,9 +1241,9 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) if (gis & (BIT1 + BIT0)) { isr = read_reg16(info, CHB + ISR); if (isr & IRQ_DCD) - dcd_change(info); + dcd_change(info, tty); if (isr & IRQ_CTS) - cts_change(info); + cts_change(info, tty); } if (gis & (BIT3 + BIT2)) { @@ -1258,8 +1260,8 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) } if (isr & IRQ_BREAK_ON) { info->icount.brk++; - if (info->flags & ASYNC_SAK) - do_SAK(info->tty); + if (info->port.flags & ASYNC_SAK) + do_SAK(tty); } if (isr & IRQ_RXTIME) { issue_command(info, CHA, CMD_RXFIFO_READ); @@ -1268,7 +1270,7 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) if (info->params.mode == MGSL_MODE_HDLC) rx_ready_hdlc(info, isr & IRQ_RXEOM); else - rx_ready_async(info, isr & IRQ_RXEOM); + rx_ready_async(info, isr & IRQ_RXEOM, tty); } /* transmit IRQs */ @@ -1277,14 +1279,14 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) info->icount.txabort++; else info->icount.txunder++; - tx_done(info); + tx_done(info, tty); } else if (isr & IRQ_ALLSENT) { info->icount.txok++; - tx_done(info); + tx_done(info, tty); } else if (isr & IRQ_TXFIFO) - tx_ready(info); + tx_ready(info, tty); } if (gis & BIT7) { pis = read_reg(info, CHA + PIS); @@ -1308,6 +1310,7 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) } spin_unlock(&info->lock); + tty_kref_put(tty); if (debug_level >= DEBUG_LEVEL_ISR) printk("%s(%d):mgslpc_isr(%d)exit.\n", @@ -1318,14 +1321,14 @@ static irqreturn_t mgslpc_isr(int dummy, void *dev_id) /* Initialize and start device. */ -static int startup(MGSLPC_INFO * info) +static int startup(MGSLPC_INFO * info, struct tty_struct *tty) { int retval = 0; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):startup(%s)\n",__FILE__,__LINE__,info->device_name); - if (info->flags & ASYNC_INITIALIZED) + if (info->port.flags & ASYNC_INITIALIZED) return 0; if (!info->tx_buf) { @@ -1352,30 +1355,30 @@ static int startup(MGSLPC_INFO * info) retval = adapter_test(info); if ( retval ) { - if (capable(CAP_SYS_ADMIN) && info->tty) - set_bit(TTY_IO_ERROR, &info->tty->flags); + if (capable(CAP_SYS_ADMIN) && tty) + set_bit(TTY_IO_ERROR, &tty->flags); release_resources(info); return retval; } /* program hardware for current parameters */ - mgslpc_change_params(info); + mgslpc_change_params(info, tty); - if (info->tty) - clear_bit(TTY_IO_ERROR, &info->tty->flags); + if (tty) + clear_bit(TTY_IO_ERROR, &tty->flags); - info->flags |= ASYNC_INITIALIZED; + info->port.flags |= ASYNC_INITIALIZED; return 0; } /* Called by mgslpc_close() and mgslpc_hangup() to shutdown hardware */ -static void shutdown(MGSLPC_INFO * info) +static void shutdown(MGSLPC_INFO * info, struct tty_struct *tty) { unsigned long flags; - if (!(info->flags & ASYNC_INITIALIZED)) + if (!(info->port.flags & ASYNC_INITIALIZED)) return; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1402,7 +1405,7 @@ static void shutdown(MGSLPC_INFO * info) /* TODO:disable interrupts instead of reset to preserve signal states */ reset_device(info); - if (!info->tty || info->tty->termios->c_cflag & HUPCL) { + if (!tty || tty->termios->c_cflag & HUPCL) { info->serial_signals &= ~(SerialSignal_DTR + SerialSignal_RTS); set_signals(info); } @@ -1411,13 +1414,13 @@ static void shutdown(MGSLPC_INFO * info) release_resources(info); - if (info->tty) - set_bit(TTY_IO_ERROR, &info->tty->flags); + if (tty) + set_bit(TTY_IO_ERROR, &tty->flags); - info->flags &= ~ASYNC_INITIALIZED; + info->port.flags &= ~ASYNC_INITIALIZED; } -static void mgslpc_program_hw(MGSLPC_INFO *info) +static void mgslpc_program_hw(MGSLPC_INFO *info, struct tty_struct *tty) { unsigned long flags; @@ -1443,7 +1446,7 @@ static void mgslpc_program_hw(MGSLPC_INFO *info) port_irq_enable(info, (unsigned char) PVR_DSR | PVR_RI); get_signals(info); - if (info->netcount || info->tty->termios->c_cflag & CREAD) + if (info->netcount || (tty && (tty->termios->c_cflag & CREAD))) rx_start(info); spin_unlock_irqrestore(&info->lock,flags); @@ -1451,19 +1454,19 @@ static void mgslpc_program_hw(MGSLPC_INFO *info) /* Reconfigure adapter based on new parameters */ -static void mgslpc_change_params(MGSLPC_INFO *info) +static void mgslpc_change_params(MGSLPC_INFO *info, struct tty_struct *tty) { unsigned cflag; int bits_per_char; - if (!info->tty || !info->tty->termios) + if (!tty || !tty->termios) return; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_change_params(%s)\n", __FILE__,__LINE__, info->device_name ); - cflag = info->tty->termios->c_cflag; + cflag = tty->termios->c_cflag; /* if B0 rate (hangup) specified then negate DTR and RTS */ /* otherwise assert DTR and RTS */ @@ -1510,7 +1513,7 @@ static void mgslpc_change_params(MGSLPC_INFO *info) * current data rate. */ if (info->params.data_rate <= 460800) { - info->params.data_rate = tty_get_baud_rate(info->tty); + info->params.data_rate = tty_get_baud_rate(tty); } if ( info->params.data_rate ) { @@ -1520,24 +1523,24 @@ static void mgslpc_change_params(MGSLPC_INFO *info) info->timeout += HZ/50; /* Add .02 seconds of slop */ if (cflag & CRTSCTS) - info->flags |= ASYNC_CTS_FLOW; + info->port.flags |= ASYNC_CTS_FLOW; else - info->flags &= ~ASYNC_CTS_FLOW; + info->port.flags &= ~ASYNC_CTS_FLOW; if (cflag & CLOCAL) - info->flags &= ~ASYNC_CHECK_CD; + info->port.flags &= ~ASYNC_CHECK_CD; else - info->flags |= ASYNC_CHECK_CD; + info->port.flags |= ASYNC_CHECK_CD; /* process tty input control flags */ info->read_status_mask = 0; - if (I_INPCK(info->tty)) + if (I_INPCK(tty)) info->read_status_mask |= BIT7 | BIT6; - if (I_IGNPAR(info->tty)) + if (I_IGNPAR(tty)) info->ignore_status_mask |= BIT7 | BIT6; - mgslpc_program_hw(info); + mgslpc_program_hw(info, tty); } /* Add a character to the transmit buffer @@ -1597,7 +1600,7 @@ static void mgslpc_flush_chars(struct tty_struct *tty) spin_lock_irqsave(&info->lock,flags); if (!info->tx_active) - tx_start(info); + tx_start(info, tty); spin_unlock_irqrestore(&info->lock,flags); } @@ -1659,7 +1662,7 @@ start: if (info->tx_count && !tty->stopped && !tty->hw_stopped) { spin_lock_irqsave(&info->lock,flags); if (!info->tx_active) - tx_start(info); + tx_start(info, tty); spin_unlock_irqrestore(&info->lock,flags); } cleanup: @@ -1764,7 +1767,7 @@ static void mgslpc_send_xchar(struct tty_struct *tty, char ch) if (ch) { spin_lock_irqsave(&info->lock,flags); if (!info->tx_enabled) - tx_start(info); + tx_start(info, tty); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1862,7 +1865,7 @@ static int get_params(MGSLPC_INFO * info, MGSL_PARAMS __user *user_params) * * Returns: 0 if success, otherwise error code */ -static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params) +static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params, struct tty_struct *tty) { unsigned long flags; MGSL_PARAMS tmp_params; @@ -1883,7 +1886,7 @@ static int set_params(MGSLPC_INFO * info, MGSL_PARAMS __user *new_params) memcpy(&info->params,&tmp_params,sizeof(MGSL_PARAMS)); spin_unlock_irqrestore(&info->lock,flags); - mgslpc_change_params(info); + mgslpc_change_params(info, tty); return 0; } @@ -1944,7 +1947,7 @@ static int set_interface(MGSLPC_INFO * info, int if_mode) return 0; } -static int set_txenable(MGSLPC_INFO * info, int enable) +static int set_txenable(MGSLPC_INFO * info, int enable, struct tty_struct *tty) { unsigned long flags; @@ -1954,7 +1957,7 @@ static int set_txenable(MGSLPC_INFO * info, int enable) spin_lock_irqsave(&info->lock,flags); if (enable) { if (!info->tx_enabled) - tx_start(info); + tx_start(info, tty); } else { if (info->tx_enabled) tx_stop(info); @@ -2263,6 +2266,11 @@ static int mgslpc_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) { MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data; + int error; + struct mgsl_icount cnow; /* kernel counter temps */ + struct serial_icounter_struct __user *p_cuser; /* user space */ + void __user *argp = (void __user *)arg; + unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_ioctl %s cmd=%08X\n", __FILE__,__LINE__, @@ -2277,22 +2285,11 @@ static int mgslpc_ioctl(struct tty_struct *tty, struct file * file, return -EIO; } - return ioctl_common(info, cmd, arg); -} - -static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg) -{ - int error; - struct mgsl_icount cnow; /* kernel counter temps */ - struct serial_icounter_struct __user *p_cuser; /* user space */ - void __user *argp = (void __user *)arg; - unsigned long flags; - switch (cmd) { case MGSL_IOCGPARAMS: return get_params(info, argp); case MGSL_IOCSPARAMS: - return set_params(info, argp); + return set_params(info, argp, tty); case MGSL_IOCGTXIDLE: return get_txidle(info, argp); case MGSL_IOCSTXIDLE: @@ -2302,7 +2299,7 @@ static int ioctl_common(MGSLPC_INFO *info, unsigned int cmd, unsigned long arg) case MGSL_IOCSIF: return set_interface(info,(int)arg); case MGSL_IOCTXENABLE: - return set_txenable(info,(int)arg); + return set_txenable(info,(int)arg, tty); case MGSL_IOCRXENABLE: return set_rxenable(info,(int)arg); case MGSL_IOCTXABORT: @@ -2369,7 +2366,7 @@ static void mgslpc_set_termios(struct tty_struct *tty, struct ktermios *old_term == RELEVANT_IFLAG(old_termios->c_iflag))) return; - mgslpc_change_params(info); + mgslpc_change_params(info, tty); /* Handle transition to B0 status */ if (old_termios->c_cflag & CBAUD && @@ -2404,81 +2401,34 @@ static void mgslpc_set_termios(struct tty_struct *tty, struct ktermios *old_term static void mgslpc_close(struct tty_struct *tty, struct file * filp) { MGSLPC_INFO * info = (MGSLPC_INFO *)tty->driver_data; + struct tty_port *port = &info->port; if (mgslpc_paranoia_check(info, tty->name, "mgslpc_close")) return; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_close(%s) entry, count=%d\n", - __FILE__,__LINE__, info->device_name, info->count); - - if (!info->count) - return; + __FILE__,__LINE__, info->device_name, port->count); - if (tty_hung_up_p(filp)) - goto cleanup; - - if ((tty->count == 1) && (info->count != 1)) { - /* - * tty->count is 1 and the tty structure will be freed. - * info->count should be one in this case. - * if it's not, correct it so that the port is shutdown. - */ - printk("mgslpc_close: bad refcount; tty->count is 1, " - "info->count is %d\n", info->count); - info->count = 1; - } + WARN_ON(!port->count); - info->count--; - - /* if at least one open remaining, leave hardware active */ - if (info->count) + if (tty_port_close_start(port, tty, filp) == 0) goto cleanup; - info->flags |= ASYNC_CLOSING; - - /* set tty->closing to notify line discipline to - * only process XON/XOFF characters. Only the N_TTY - * discipline appears to use this (ppp does not). - */ - tty->closing = 1; - - /* wait for transmit data to clear all layers */ - - if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) { - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):mgslpc_close(%s) calling tty_wait_until_sent\n", - __FILE__,__LINE__, info->device_name ); - tty_wait_until_sent(tty, info->closing_wait); - } - - if (info->flags & ASYNC_INITIALIZED) + if (port->flags & ASYNC_INITIALIZED) mgslpc_wait_until_sent(tty, info->timeout); mgslpc_flush_buffer(tty); tty_ldisc_flush(tty); - - shutdown(info); - - tty->closing = 0; - info->tty = NULL; - - if (info->blocked_open) { - if (info->close_delay) { - msleep_interruptible(jiffies_to_msecs(info->close_delay)); - } - wake_up_interruptible(&info->open_wait); - } - - info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - - wake_up_interruptible(&info->close_wait); - + shutdown(info, tty); + + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); cleanup: if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_close(%s) exit, count=%d\n", __FILE__,__LINE__, - tty->driver->name, info->count); + tty->driver->name, port->count); } /* Wait until the transmitter is empty. @@ -2498,7 +2448,7 @@ static void mgslpc_wait_until_sent(struct tty_struct *tty, int timeout) if (mgslpc_paranoia_check(info, tty->name, "mgslpc_wait_until_sent")) return; - if (!(info->flags & ASYNC_INITIALIZED)) + if (!(info->port.flags & ASYNC_INITIALIZED)) goto exit; orig_jiffies = jiffies; @@ -2559,120 +2509,40 @@ static void mgslpc_hangup(struct tty_struct *tty) return; mgslpc_flush_buffer(tty); - shutdown(info); - - info->count = 0; - info->flags &= ~ASYNC_NORMAL_ACTIVE; - info->tty = NULL; - - wake_up_interruptible(&info->open_wait); + shutdown(info, tty); + tty_port_hangup(&info->port); } -/* Block the current process until the specified port - * is ready to be opened. - */ -static int block_til_ready(struct tty_struct *tty, struct file *filp, - MGSLPC_INFO *info) +static int carrier_raised(struct tty_port *port) { - DECLARE_WAITQUEUE(wait, current); - int retval; - bool do_clocal = false; - bool extra_count = false; - unsigned long flags; - - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):block_til_ready on %s\n", - __FILE__,__LINE__, tty->driver->name ); - - if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ - /* nonblock mode is set or port is not enabled */ - /* just verify that callout device is not active */ - info->flags |= ASYNC_NORMAL_ACTIVE; - return 0; - } - - if (tty->termios->c_cflag & CLOCAL) - do_clocal = true; - - /* Wait for carrier detect and the line to become - * free (i.e., not in use by the callout). While we are in - * this loop, info->count is dropped by one, so that - * mgslpc_close() knows when to free things. We restore it upon - * exit, either normal or abnormal. - */ - - retval = 0; - add_wait_queue(&info->open_wait, &wait); - - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):block_til_ready before block on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->count ); - - spin_lock_irqsave(&info->lock, flags); - if (!tty_hung_up_p(filp)) { - extra_count = true; - info->count--; - } - spin_unlock_irqrestore(&info->lock, flags); - info->blocked_open++; - - while (1) { - if ((tty->termios->c_cflag & CBAUD)) { - spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; - set_signals(info); - spin_unlock_irqrestore(&info->lock,flags); - } - - set_current_state(TASK_INTERRUPTIBLE); - - if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){ - retval = (info->flags & ASYNC_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS; - break; - } - - spin_lock_irqsave(&info->lock,flags); - get_signals(info); - spin_unlock_irqrestore(&info->lock,flags); - - if (!(info->flags & ASYNC_CLOSING) && - (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) { - break; - } - - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):block_til_ready blocking on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->count ); - - schedule(); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&info->open_wait, &wait); + MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); + unsigned long flags; - if (extra_count) - info->count++; - info->blocked_open--; + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):block_til_ready after blocking on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->count ); + if (info->serial_signals & SerialSignal_DCD) + return 1; + return 0; +} - if (!retval) - info->flags |= ASYNC_NORMAL_ACTIVE; +static void raise_dtr_rts(struct tty_port *port) +{ + MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); + unsigned long flags; - return retval; + spin_lock_irqsave(&info->lock,flags); + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); } + static int mgslpc_open(struct tty_struct *tty, struct file * filp) { MGSLPC_INFO *info; + struct tty_port *port; int retval, line; unsigned long flags; @@ -2691,23 +2561,24 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp) if (mgslpc_paranoia_check(info, tty->name, "mgslpc_open")) return -ENODEV; + port = &info->port; tty->driver_data = info; - info->tty = tty; + tty_port_tty_set(port, tty); if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgslpc_open(%s), old ref count = %d\n", - __FILE__,__LINE__,tty->driver->name, info->count); + __FILE__,__LINE__,tty->driver->name, port->count); /* If port is closing, signal caller to try again */ - if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){ - if (info->flags & ASYNC_CLOSING) - interruptible_sleep_on(&info->close_wait); - retval = ((info->flags & ASYNC_HUP_NOTIFY) ? + if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING){ + if (port->flags & ASYNC_CLOSING) + interruptible_sleep_on(&port->close_wait); + retval = ((port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); goto cleanup; } - info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + tty->low_latency = (port->flags & ASYNC_LOW_LATENCY) ? 1 : 0; spin_lock_irqsave(&info->netlock, flags); if (info->netcount) { @@ -2715,17 +2586,19 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp) spin_unlock_irqrestore(&info->netlock, flags); goto cleanup; } - info->count++; + spin_lock(&port->lock); + port->count++; + spin_unlock(&port->lock); spin_unlock_irqrestore(&info->netlock, flags); - if (info->count == 1) { + if (port->count == 1) { /* 1st open on this device, init hardware */ - retval = startup(info); + retval = startup(info, tty); if (retval < 0) goto cleanup; } - retval = block_til_ready(tty, filp, info); + retval = tty_port_block_til_ready(&info->port, tty, filp); if (retval) { if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):block_til_ready(%s) returned %d\n", @@ -2739,13 +2612,6 @@ static int mgslpc_open(struct tty_struct *tty, struct file * filp) retval = 0; cleanup: - if (retval) { - if (tty->count == 1) - info->tty = NULL; /* tty layer will release tty struct */ - if(info->count) - info->count--; - } - return retval; } @@ -3500,7 +3366,7 @@ static void rx_start(MGSLPC_INFO *info) info->rx_enabled = true; } -static void tx_start(MGSLPC_INFO *info) +static void tx_start(MGSLPC_INFO *info, struct tty_struct *tty) { if (debug_level >= DEBUG_LEVEL_ISR) printk("%s(%d):tx_start(%s)\n", @@ -3524,11 +3390,11 @@ static void tx_start(MGSLPC_INFO *info) if (info->params.mode == MGSL_MODE_ASYNC) { if (!info->tx_active) { info->tx_active = true; - tx_ready(info); + tx_ready(info, tty); } } else { info->tx_active = true; - tx_ready(info); + tx_ready(info, tty); mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(5000)); } @@ -3849,13 +3715,12 @@ static void rx_reset_buffers(MGSLPC_INFO *info) * * Returns true if frame returned, otherwise false */ -static bool rx_get_frame(MGSLPC_INFO *info) +static bool rx_get_frame(MGSLPC_INFO *info, struct tty_struct *tty) { unsigned short status; RXBUF *buf; unsigned int framesize = 0; unsigned long flags; - struct tty_struct *tty = info->tty; bool return_frame = false; if (info->rx_frame_count == 0) @@ -4075,7 +3940,11 @@ static void tx_timeout(unsigned long context) hdlcdev_tx_done(info); else #endif - bh_transmit(info); + { + struct tty_struct *tty = tty_port_tty_get(&info->port); + bh_transmit(info, tty); + tty_kref_put(tty); + } } #if SYNCLINK_GENERIC_HDLC @@ -4094,11 +3963,12 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, unsigned short parity) { MGSLPC_INFO *info = dev_to_port(dev); + struct tty_struct *tty; unsigned char new_encoding; unsigned short new_crctype; /* return error if TTY interface open */ - if (info->count) + if (info->port.count) return -EBUSY; switch (encoding) @@ -4123,8 +3993,11 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding, info->params.crc_type = new_crctype; /* if network interface up, reprogram hardware */ - if (info->netcount) - mgslpc_program_hw(info); + if (info->netcount) { + tty = tty_port_tty_get(&info->port); + mgslpc_program_hw(info, tty); + tty_kref_put(tty); + } return 0; } @@ -4165,8 +4038,11 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) /* start hardware transmitter if necessary */ spin_lock_irqsave(&info->lock,flags); - if (!info->tx_active) - tx_start(info); + if (!info->tx_active) { + struct tty_struct *tty = tty_port_tty_get(&info->port); + tx_start(info, tty); + tty_kref_put(tty); + } spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -4183,6 +4059,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev) static int hdlcdev_open(struct net_device *dev) { MGSLPC_INFO *info = dev_to_port(dev); + struct tty_struct *tty; int rc; unsigned long flags; @@ -4195,7 +4072,7 @@ static int hdlcdev_open(struct net_device *dev) /* arbitrate between network and tty opens */ spin_lock_irqsave(&info->netlock, flags); - if (info->count != 0 || info->netcount != 0) { + if (info->port.count != 0 || info->netcount != 0) { printk(KERN_WARNING "%s: hdlc_open returning busy\n", dev->name); spin_unlock_irqrestore(&info->netlock, flags); return -EBUSY; @@ -4203,17 +4080,19 @@ static int hdlcdev_open(struct net_device *dev) info->netcount=1; spin_unlock_irqrestore(&info->netlock, flags); + tty = tty_port_tty_get(&info->port); /* claim resources and init adapter */ - if ((rc = startup(info)) != 0) { + if ((rc = startup(info, tty)) != 0) { + tty_kref_put(tty); spin_lock_irqsave(&info->netlock, flags); info->netcount=0; spin_unlock_irqrestore(&info->netlock, flags); return rc; } - /* assert DTR and RTS, apply hardware settings */ info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; - mgslpc_program_hw(info); + mgslpc_program_hw(info, tty); + tty_kref_put(tty); /* enable network layer transmit */ dev->trans_start = jiffies; @@ -4241,6 +4120,7 @@ static int hdlcdev_open(struct net_device *dev) static int hdlcdev_close(struct net_device *dev) { MGSLPC_INFO *info = dev_to_port(dev); + struct tty_struct *tty = tty_port_tty_get(&info->port); unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -4249,8 +4129,8 @@ static int hdlcdev_close(struct net_device *dev) netif_stop_queue(dev); /* shutdown adapter and release resources */ - shutdown(info); - + shutdown(info, tty); + tty_kref_put(tty); hdlc_close(dev); spin_lock_irqsave(&info->netlock, flags); @@ -4281,7 +4161,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) printk("%s:hdlcdev_ioctl(%s)\n",__FILE__,dev->name); /* return error if TTY interface open */ - if (info->count) + if (info->port.count) return -EBUSY; if (cmd != SIOCWANDEV) @@ -4354,8 +4234,11 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) info->params.clock_speed = 0; /* if network interface up, reprogram hardware */ - if (info->netcount) - mgslpc_program_hw(info); + if (info->netcount) { + struct tty_struct *tty = tty_port_tty_get(&info->port); + mgslpc_program_hw(info, tty); + tty_kref_put(tty); + } return 0; default: diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 6d4582712b1f..112a6ba9a96f 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -5,8 +5,6 @@ * * Added support for a Unix98-style ptmx device. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998 - * Added TTY_DO_WRITE_WAKEUP to enable n_tty to send POLL_OUT to - * waiting writers -- Sapan Bhatia <sapan@corewars.org> * * When reading this code see also fs/devpts. In particular note that the * driver_data field is used by the devpts side as a binding to the devpts @@ -217,7 +215,6 @@ static int pty_open(struct tty_struct *tty, struct file *filp) clear_bit(TTY_OTHER_CLOSED, &tty->link->flags); set_bit(TTY_THROTTLED, &tty->flags); - set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); retval = 0; out: return retval; @@ -230,6 +227,55 @@ static void pty_set_termios(struct tty_struct *tty, tty->termios->c_cflag |= (CS8 | CREAD); } +/** + * pty_do_resize - resize event + * @tty: tty being resized + * @real_tty: real tty (not the same as tty if using a pty/tty pair) + * @rows: rows (character) + * @cols: cols (character) + * + * Update the termios variables and send the neccessary signals to + * peform a terminal resize correctly + */ + +int pty_resize(struct tty_struct *tty, struct winsize *ws) +{ + struct pid *pgrp, *rpgrp; + unsigned long flags; + struct tty_struct *pty = tty->link; + + /* For a PTY we need to lock the tty side */ + mutex_lock(&tty->termios_mutex); + if (!memcmp(ws, &tty->winsize, sizeof(*ws))) + goto done; + + /* Get the PID values and reference them so we can + avoid holding the tty ctrl lock while sending signals. + We need to lock these individually however. */ + + spin_lock_irqsave(&tty->ctrl_lock, flags); + pgrp = get_pid(tty->pgrp); + spin_unlock_irqrestore(&tty->ctrl_lock, flags); + + spin_lock_irqsave(&pty->ctrl_lock, flags); + rpgrp = get_pid(pty->pgrp); + spin_unlock_irqrestore(&pty->ctrl_lock, flags); + + if (pgrp) + kill_pgrp(pgrp, SIGWINCH, 1); + if (rpgrp != pgrp && rpgrp) + kill_pgrp(rpgrp, SIGWINCH, 1); + + put_pid(pgrp); + put_pid(rpgrp); + + tty->winsize = *ws; + pty->winsize = *ws; /* Never used so will go away soon */ +done: + mutex_unlock(&tty->termios_mutex); + return 0; +} + static int pty_install(struct tty_driver *driver, struct tty_struct *tty) { struct tty_struct *o_tty; @@ -290,6 +336,7 @@ static const struct tty_operations pty_ops = { .chars_in_buffer = pty_chars_in_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, + .resize = pty_resize }; /* Traditional BSD devices */ @@ -319,6 +366,7 @@ static const struct tty_operations pty_ops_bsd = { .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .ioctl = pty_bsd_ioctl, + .resize = pty_resize }; static void __init legacy_pty_init(void) @@ -561,7 +609,8 @@ static const struct tty_operations ptm_unix98_ops = { .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .ioctl = pty_unix98_ioctl, - .shutdown = pty_unix98_shutdown + .shutdown = pty_unix98_shutdown, + .resize = pty_resize }; static const struct tty_operations pty_unix98_ops = { diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c index a8f68a3f14dd..2e8a6eed34be 100644 --- a/drivers/char/rio/rio_linux.c +++ b/drivers/char/rio/rio_linux.c @@ -173,7 +173,7 @@ static void rio_disable_tx_interrupts(void *ptr); static void rio_enable_tx_interrupts(void *ptr); static void rio_disable_rx_interrupts(void *ptr); static void rio_enable_rx_interrupts(void *ptr); -static int rio_get_CD(void *ptr); +static int rio_carrier_raised(struct tty_port *port); static void rio_shutdown_port(void *ptr); static int rio_set_real_termios(void *ptr); static void rio_hungup(void *ptr); @@ -224,7 +224,6 @@ static struct real_driver rio_real_driver = { rio_enable_tx_interrupts, rio_disable_rx_interrupts, rio_enable_rx_interrupts, - rio_get_CD, rio_shutdown_port, rio_set_real_termios, rio_chars_in_buffer, @@ -476,9 +475,9 @@ static void rio_enable_rx_interrupts(void *ptr) /* Jeez. Isn't this simple? */ -static int rio_get_CD(void *ptr) +static int rio_carrier_raised(struct tty_port *port) { - struct Port *PortP = ptr; + struct Port *PortP = container_of(port, struct Port, gs.port); int rv; func_enter(); @@ -797,16 +796,9 @@ static int rio_init_drivers(void) return 1; } - -static void *ckmalloc(int size) -{ - void *p; - - p = kzalloc(size, GFP_KERNEL); - return p; -} - - +static const struct tty_port_operations rio_port_ops = { + .carrier_raised = rio_carrier_raised, +}; static int rio_init_datastructures(void) { @@ -826,33 +818,30 @@ static int rio_init_datastructures(void) #define TMIO_SZ sizeof(struct termios *) rio_dprintk(RIO_DEBUG_INIT, "getting : %Zd %Zd %Zd %Zd %Zd bytes\n", RI_SZ, RIO_HOSTS * HOST_SZ, RIO_PORTS * PORT_SZ, RIO_PORTS * TMIO_SZ, RIO_PORTS * TMIO_SZ); - if (!(p = ckmalloc(RI_SZ))) + if (!(p = kzalloc(RI_SZ, GFP_KERNEL))) goto free0; - if (!(p->RIOHosts = ckmalloc(RIO_HOSTS * HOST_SZ))) + if (!(p->RIOHosts = kzalloc(RIO_HOSTS * HOST_SZ, GFP_KERNEL))) goto free1; - if (!(p->RIOPortp = ckmalloc(RIO_PORTS * PORT_SZ))) + if (!(p->RIOPortp = kzalloc(RIO_PORTS * PORT_SZ, GFP_KERNEL))) goto free2; p->RIOConf = RIOConf; rio_dprintk(RIO_DEBUG_INIT, "Got : %p %p %p\n", p, p->RIOHosts, p->RIOPortp); #if 1 for (i = 0; i < RIO_PORTS; i++) { - port = p->RIOPortp[i] = ckmalloc(sizeof(struct Port)); + port = p->RIOPortp[i] = kzalloc(sizeof(struct Port), GFP_KERNEL); if (!port) { goto free6; } rio_dprintk(RIO_DEBUG_INIT, "initing port %d (%d)\n", i, port->Mapped); + tty_port_init(&port->gs.port); + port->gs.port.ops = &rio_port_ops; port->PortNum = i; port->gs.magic = RIO_MAGIC; port->gs.close_delay = HZ / 2; port->gs.closing_wait = 30 * HZ; port->gs.rd = &rio_real_driver; spin_lock_init(&port->portSem); - /* - * Initializing wait queue - */ - init_waitqueue_head(&port->gs.port.open_wait); - init_waitqueue_head(&port->gs.port.close_wait); } #else /* We could postpone initializing them to when they are configured. */ diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c index 2c6c8f33d6b4..9af8d74875bc 100644 --- a/drivers/char/riscom8.c +++ b/drivers/char/riscom8.c @@ -857,98 +857,21 @@ static void rc_shutdown_port(struct tty_struct *tty, rc_shutdown_board(bp); } -static int block_til_ready(struct tty_struct *tty, struct file *filp, - struct riscom_port *port) +static int carrier_raised(struct tty_port *port) { - DECLARE_WAITQUEUE(wait, current); - struct riscom_board *bp = port_Board(port); - int retval; - int do_clocal = 0; - int CD; + struct riscom_port *p = container_of(port, struct riscom_port, port); + struct riscom_board *bp = port_Board(p); unsigned long flags; - - /* - * If the device is in the middle of being closed, then block - * until it's done, and then try again. - */ - if (tty_hung_up_p(filp) || port->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&port->port.close_wait); - if (port->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - else - return -ERESTARTSYS; - } - - /* - * If non-blocking mode is set, or the port is not enabled, - * then make the check up front and then exit. - */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; - } - - if (C_CLOCAL(tty)) - do_clocal = 1; - - /* - * Block waiting for the carrier detect and the line to become - * free (i.e., not in use by the callout). While we are in - * this loop, info->count is dropped by one, so that - * rs_close() knows when to free things. We restore it upon - * exit, either normal or abnormal. - */ - retval = 0; - add_wait_queue(&port->port.open_wait, &wait); - + int CD; + spin_lock_irqsave(&riscom_lock, flags); - - if (!tty_hung_up_p(filp)) - port->port.count--; - + rc_out(bp, CD180_CAR, port_No(p)); + CD = rc_in(bp, CD180_MSVR) & MSVR_CD; + rc_out(bp, CD180_MSVR, MSVR_RTS); + bp->DTR &= ~(1u << port_No(p)); + rc_out(bp, RC_DTR, bp->DTR); spin_unlock_irqrestore(&riscom_lock, flags); - - port->port.blocked_open++; - while (1) { - spin_lock_irqsave(&riscom_lock, flags); - - rc_out(bp, CD180_CAR, port_No(port)); - CD = rc_in(bp, CD180_MSVR) & MSVR_CD; - rc_out(bp, CD180_MSVR, MSVR_RTS); - bp->DTR &= ~(1u << port_No(port)); - rc_out(bp, RC_DTR, bp->DTR); - - spin_unlock_irqrestore(&riscom_lock, flags); - - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || - !(port->port.flags & ASYNC_INITIALIZED)) { - if (port->port.flags & ASYNC_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - break; - } - if (!(port->port.flags & ASYNC_CLOSING) && - (do_clocal || CD)) - break; - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - schedule(); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&port->port.open_wait, &wait); - if (!tty_hung_up_p(filp)) - port->port.count++; - port->port.blocked_open--; - if (retval) - return retval; - - port->port.flags |= ASYNC_NORMAL_ACTIVE; - return 0; + return CD; } static int rc_open(struct tty_struct *tty, struct file *filp) @@ -977,13 +900,13 @@ static int rc_open(struct tty_struct *tty, struct file *filp) error = rc_setup_port(bp, port); if (error == 0) - error = block_til_ready(tty, filp, port); + error = tty_port_block_til_ready(&port->port, tty, filp); return error; } static void rc_flush_buffer(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; unsigned long flags; if (rc_paranoia_check(port, tty->name, "rc_flush_buffer")) @@ -998,7 +921,7 @@ static void rc_flush_buffer(struct tty_struct *tty) static void rc_close(struct tty_struct *tty, struct file *filp) { - struct riscom_port *port = (struct riscom_port *) tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned long flags; unsigned long timeout; @@ -1006,40 +929,19 @@ static void rc_close(struct tty_struct *tty, struct file *filp) if (!port || rc_paranoia_check(port, tty->name, "close")) return; - spin_lock_irqsave(&riscom_lock, flags); - - if (tty_hung_up_p(filp)) - goto out; - bp = port_Board(port); - if ((tty->count == 1) && (port->port.count != 1)) { - printk(KERN_INFO "rc%d: rc_close: bad port count;" - " tty->count is 1, port count is %d\n", - board_No(bp), port->port.count); - port->port.count = 1; - } - if (--port->port.count < 0) { - printk(KERN_INFO "rc%d: rc_close: bad port count " - "for tty%d: %d\n", - board_No(bp), port_No(port), port->port.count); - port->port.count = 0; - } - if (port->port.count) - goto out; - port->port.flags |= ASYNC_CLOSING; - /* - * Now we wait for the transmit buffer to clear; and we notify - * the line discipline to only process XON/XOFF characters. - */ - tty->closing = 1; - if (port->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, port->port.closing_wait); + + if (tty_port_close_start(&port->port, tty, filp) == 0) + return; + /* * At this point we stop accepting input. To do this, we * disable the receive line status interrupts, and tell the * interrupt driver to stop checking the data ready bit in the * line status register. */ + + spin_lock_irqsave(&riscom_lock, flags); port->IER &= ~IER_RXD; if (port->port.flags & ASYNC_INITIALIZED) { port->IER &= ~IER_TXRDY; @@ -1053,33 +955,24 @@ static void rc_close(struct tty_struct *tty, struct file *filp) */ timeout = jiffies + HZ; while (port->IER & IER_TXEMPTY) { + spin_unlock_irqrestore(&riscom_lock, flags); msleep_interruptible(jiffies_to_msecs(port->timeout)); + spin_lock_irqsave(&riscom_lock, flags); if (time_after(jiffies, timeout)) break; } } rc_shutdown_port(tty, bp, port); rc_flush_buffer(tty); - tty_ldisc_flush(tty); - - tty->closing = 0; - port->port.tty = NULL; - if (port->port.blocked_open) { - if (port->port.close_delay) - msleep_interruptible(jiffies_to_msecs(port->port.close_delay)); - wake_up_interruptible(&port->port.open_wait); - } - port->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&port->port.close_wait); - -out: spin_unlock_irqrestore(&riscom_lock, flags); + + tty_port_close_end(&port->port, tty); } static int rc_write(struct tty_struct *tty, const unsigned char *buf, int count) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; int c, total = 0; unsigned long flags; @@ -1122,7 +1015,7 @@ static int rc_write(struct tty_struct *tty, static int rc_put_char(struct tty_struct *tty, unsigned char ch) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; unsigned long flags; int ret = 0; @@ -1146,7 +1039,7 @@ out: static void rc_flush_chars(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; unsigned long flags; if (rc_paranoia_check(port, tty->name, "rc_flush_chars")) @@ -1166,7 +1059,7 @@ static void rc_flush_chars(struct tty_struct *tty) static int rc_write_room(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; int ret; if (rc_paranoia_check(port, tty->name, "rc_write_room")) @@ -1180,7 +1073,7 @@ static int rc_write_room(struct tty_struct *tty) static int rc_chars_in_buffer(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; if (rc_paranoia_check(port, tty->name, "rc_chars_in_buffer")) return 0; @@ -1190,7 +1083,7 @@ static int rc_chars_in_buffer(struct tty_struct *tty) static int rc_tiocmget(struct tty_struct *tty, struct file *file) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned char status; unsigned int result; @@ -1220,7 +1113,7 @@ static int rc_tiocmget(struct tty_struct *tty, struct file *file) static int rc_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; unsigned long flags; struct riscom_board *bp; @@ -1252,7 +1145,7 @@ static int rc_tiocmset(struct tty_struct *tty, struct file *file, static int rc_send_break(struct tty_struct *tty, int length) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp = port_Board(port); unsigned long flags; @@ -1345,7 +1238,7 @@ static int rc_get_serial_info(struct riscom_port *port, static int rc_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; void __user *argp = (void __user *)arg; int retval; @@ -1371,7 +1264,7 @@ static int rc_ioctl(struct tty_struct *tty, struct file *filp, static void rc_throttle(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned long flags; @@ -1393,7 +1286,7 @@ static void rc_throttle(struct tty_struct *tty) static void rc_unthrottle(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned long flags; @@ -1415,7 +1308,7 @@ static void rc_unthrottle(struct tty_struct *tty) static void rc_stop(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned long flags; @@ -1433,7 +1326,7 @@ static void rc_stop(struct tty_struct *tty) static void rc_start(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; unsigned long flags; @@ -1454,8 +1347,9 @@ static void rc_start(struct tty_struct *tty) static void rc_hangup(struct tty_struct *tty) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; struct riscom_board *bp; + unsigned long flags; if (rc_paranoia_check(port, tty->name, "rc_hangup")) return; @@ -1463,16 +1357,18 @@ static void rc_hangup(struct tty_struct *tty) bp = port_Board(port); rc_shutdown_port(tty, bp, port); + spin_lock_irqsave(&port->port.lock, flags); port->port.count = 0; port->port.flags &= ~ASYNC_NORMAL_ACTIVE; port->port.tty = NULL; wake_up_interruptible(&port->port.open_wait); + spin_unlock_irqrestore(&port->port.lock, flags); } static void rc_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct riscom_port *port = (struct riscom_port *)tty->driver_data; + struct riscom_port *port = tty->driver_data; unsigned long flags; if (rc_paranoia_check(port, tty->name, "rc_set_termios")) @@ -1510,6 +1406,11 @@ static const struct tty_operations riscom_ops = { .break_ctl = rc_send_break, }; +static const struct tty_port_operations riscom_port_ops = { + .carrier_raised = carrier_raised, +}; + + static int __init rc_init_drivers(void) { int error; @@ -1541,6 +1442,7 @@ static int __init rc_init_drivers(void) memset(rc_port, 0, sizeof(rc_port)); for (i = 0; i < RC_NPORT * RC_NBOARD; i++) { tty_port_init(&rc_port[i].port); + rc_port[i].port.ops = &riscom_port_ops; rc_port[i].magic = RISCOM8_MAGIC; } return 0; diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index 584d791e84a6..f59fc5cea067 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -135,6 +135,7 @@ static int rcktpt_type[NUM_BOARDS]; static int is_PCI[NUM_BOARDS]; static rocketModel_t rocketModel[NUM_BOARDS]; static int max_board; +static const struct tty_port_operations rocket_port_ops; /* * The following arrays define the interrupt bits corresponding to each AIOP. @@ -435,15 +436,15 @@ static void rp_do_transmit(struct r_port *info) #endif if (!info) return; - if (!info->port.tty) { - printk(KERN_WARNING "rp: WARNING %s called with " - "info->port.tty==NULL\n", __func__); + tty = tty_port_tty_get(&info->port); + + if (tty == NULL) { + printk(KERN_WARNING "rp: WARNING %s called with tty==NULL\n", __func__); clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]); return; } spin_lock_irqsave(&info->slock, flags); - tty = info->port.tty; info->xmit_fifo_room = TXFIFO_SIZE - sGetTxCnt(cp); /* Loop sending data to FIFO until done or FIFO full */ @@ -477,6 +478,7 @@ static void rp_do_transmit(struct r_port *info) } spin_unlock_irqrestore(&info->slock, flags); + tty_kref_put(tty); #ifdef ROCKET_DEBUG_INTR printk(KERN_DEBUG "(%d,%d,%d,%d)...\n", info->xmit_cnt, info->xmit_head, @@ -498,18 +500,18 @@ static void rp_handle_port(struct r_port *info) if (!info) return; - if ((info->flags & ROCKET_INITIALIZED) == 0) { + if ((info->port.flags & ASYNC_INITIALIZED) == 0) { printk(KERN_WARNING "rp: WARNING: rp_handle_port called with " "info->flags & NOT_INIT\n"); return; } - if (!info->port.tty) { + tty = tty_port_tty_get(&info->port); + if (!tty) { printk(KERN_WARNING "rp: WARNING: rp_handle_port called with " - "info->port.tty==NULL\n"); + "tty==NULL\n"); return; } cp = &info->channel; - tty = info->port.tty; IntMask = sGetChanIntID(cp) & info->intmask; #ifdef ROCKET_DEBUG_INTR @@ -541,6 +543,7 @@ static void rp_handle_port(struct r_port *info) printk(KERN_INFO "DSR change...\n"); } #endif + tty_kref_put(tty); } /* @@ -649,9 +652,8 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) info->board = board; info->aiop = aiop; info->chan = chan; - info->port.closing_wait = 3000; - info->port.close_delay = 50; - init_waitqueue_head(&info->port.open_wait); + tty_port_init(&info->port); + info->port.ops = &rocket_port_ops; init_completion(&info->close_wait); info->flags &= ~ROCKET_MODE_MASK; switch (pc104[board][line]) { @@ -710,7 +712,7 @@ static void init_r_port(int board, int aiop, int chan, struct pci_dev *pci_dev) * Configures a rocketport port according to its termio settings. Called from * user mode into the driver (exception handler). *info CD manipulation is spinlock protected. */ -static void configure_r_port(struct r_port *info, +static void configure_r_port(struct tty_struct *tty, struct r_port *info, struct ktermios *old_termios) { unsigned cflag; @@ -718,7 +720,7 @@ static void configure_r_port(struct r_port *info, unsigned rocketMode; int bits, baud, divisor; CHANNEL_t *cp; - struct ktermios *t = info->port.tty->termios; + struct ktermios *t = tty->termios; cp = &info->channel; cflag = t->c_cflag; @@ -751,7 +753,7 @@ static void configure_r_port(struct r_port *info, } /* baud rate */ - baud = tty_get_baud_rate(info->port.tty); + baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; divisor = ((rp_baud_base[info->board] + (baud >> 1)) / baud) - 1; @@ -769,7 +771,7 @@ static void configure_r_port(struct r_port *info, sSetBaud(cp, divisor); /* FIXME: Should really back compute a baud rate from the divisor */ - tty_encode_baud_rate(info->port.tty, baud, baud); + tty_encode_baud_rate(tty, baud, baud); if (cflag & CRTSCTS) { info->intmask |= DELTA_CTS; @@ -794,15 +796,15 @@ static void configure_r_port(struct r_port *info, * Handle software flow control in the board */ #ifdef ROCKET_SOFT_FLOW - if (I_IXON(info->port.tty)) { + if (I_IXON(tty)) { sEnTxSoftFlowCtl(cp); - if (I_IXANY(info->port.tty)) { + if (I_IXANY(tty)) { sEnIXANY(cp); } else { sDisIXANY(cp); } - sSetTxXONChar(cp, START_CHAR(info->port.tty)); - sSetTxXOFFChar(cp, STOP_CHAR(info->port.tty)); + sSetTxXONChar(cp, START_CHAR(tty)); + sSetTxXOFFChar(cp, STOP_CHAR(tty)); } else { sDisTxSoftFlowCtl(cp); sDisIXANY(cp); @@ -814,24 +816,24 @@ static void configure_r_port(struct r_port *info, * Set up ignore/read mask words */ info->read_status_mask = STMRCVROVRH | 0xFF; - if (I_INPCK(info->port.tty)) + if (I_INPCK(tty)) info->read_status_mask |= STMFRAMEH | STMPARITYH; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + if (I_BRKINT(tty) || I_PARMRK(tty)) info->read_status_mask |= STMBREAKH; /* * Characters to ignore */ info->ignore_status_mask = 0; - if (I_IGNPAR(info->port.tty)) + if (I_IGNPAR(tty)) info->ignore_status_mask |= STMFRAMEH | STMPARITYH; - if (I_IGNBRK(info->port.tty)) { + if (I_IGNBRK(tty)) { info->ignore_status_mask |= STMBREAKH; /* * If we're ignoring parity and break indicators, * ignore overruns too. (For real raw support). */ - if (I_IGNPAR(info->port.tty)) + if (I_IGNPAR(tty)) info->ignore_status_mask |= STMRCVROVRH; } @@ -864,106 +866,17 @@ static void configure_r_port(struct r_port *info, } } -/* info->port.count is considered critical, protected by spinlocks. */ -static int block_til_ready(struct tty_struct *tty, struct file *filp, - struct r_port *info) +static int carrier_raised(struct tty_port *port) { - DECLARE_WAITQUEUE(wait, current); - int retval; - int do_clocal = 0, extra_count = 0; - unsigned long flags; - - /* - * If the device is in the middle of being closed, then block - * until it's done, and then try again. - */ - if (tty_hung_up_p(filp)) - return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); - if (info->flags & ROCKET_CLOSING) { - if (wait_for_completion_interruptible(&info->close_wait)) - return -ERESTARTSYS; - return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); - } - - /* - * If non-blocking mode is set, or the port is not enabled, - * then make the check up front and then exit. - */ - if ((filp->f_flags & O_NONBLOCK) || (tty->flags & (1 << TTY_IO_ERROR))) { - info->flags |= ROCKET_NORMAL_ACTIVE; - return 0; - } - if (tty->termios->c_cflag & CLOCAL) - do_clocal = 1; - - /* - * Block waiting for the carrier detect and the line to become free. While we are in - * this loop, info->port.count is dropped by one, so that rp_close() knows when to free things. - * We restore it upon exit, either normal or abnormal. - */ - retval = 0; - add_wait_queue(&info->port.open_wait, &wait); -#ifdef ROCKET_DEBUG_OPEN - printk(KERN_INFO "block_til_ready before block: ttyR%d, count = %d\n", info->line, info->port.count); -#endif - spin_lock_irqsave(&info->slock, flags); - -#ifdef ROCKET_DISABLE_SIMUSAGE - info->flags |= ROCKET_NORMAL_ACTIVE; -#else - if (!tty_hung_up_p(filp)) { - extra_count = 1; - info->port.count--; - } -#endif - info->port.blocked_open++; - - spin_unlock_irqrestore(&info->slock, flags); - - while (1) { - if (tty->termios->c_cflag & CBAUD) { - sSetDTR(&info->channel); - sSetRTS(&info->channel); - } - set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(info->flags & ROCKET_INITIALIZED)) { - if (info->flags & ROCKET_HUP_NOTIFY) - retval = -EAGAIN; - else - retval = -ERESTARTSYS; - break; - } - if (!(info->flags & ROCKET_CLOSING) && (do_clocal || (sGetChanStatusLo(&info->channel) & CD_ACT))) - break; - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } -#ifdef ROCKET_DEBUG_OPEN - printk(KERN_INFO "block_til_ready blocking: ttyR%d, count = %d, flags=0x%0x\n", - info->line, info->port.count, info->flags); -#endif - schedule(); /* Don't hold spinlock here, will hang PC */ - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&info->port.open_wait, &wait); - - spin_lock_irqsave(&info->slock, flags); - - if (extra_count) - info->port.count++; - info->port.blocked_open--; - - spin_unlock_irqrestore(&info->slock, flags); + struct r_port *info = container_of(port, struct r_port, port); + return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0; +} -#ifdef ROCKET_DEBUG_OPEN - printk(KERN_INFO "block_til_ready after blocking: ttyR%d, count = %d\n", - info->line, info->port.count); -#endif - if (retval) - return retval; - info->flags |= ROCKET_NORMAL_ACTIVE; - return 0; +static void raise_dtr_rts(struct tty_port *port) +{ + struct r_port *info = container_of(port, struct r_port, port); + sSetDTR(&info->channel); + sSetRTS(&info->channel); } /* @@ -973,24 +886,26 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, static int rp_open(struct tty_struct *tty, struct file *filp) { struct r_port *info; + struct tty_port *port; int line = 0, retval; CHANNEL_t *cp; unsigned long page; line = tty->index; - if ((line < 0) || (line >= MAX_RP_PORTS) || ((info = rp_table[line]) == NULL)) + if (line < 0 || line >= MAX_RP_PORTS || ((info = rp_table[line]) == NULL)) return -ENXIO; - + port = &info->port; + page = __get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; - if (info->flags & ROCKET_CLOSING) { + if (port->flags & ASYNC_CLOSING) { retval = wait_for_completion_interruptible(&info->close_wait); free_page(page); if (retval) return retval; - return ((info->flags & ROCKET_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); + return ((port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS); } /* @@ -1002,9 +917,9 @@ static int rp_open(struct tty_struct *tty, struct file *filp) info->xmit_buf = (unsigned char *) page; tty->driver_data = info; - info->port.tty = tty; + tty_port_tty_set(port, tty); - if (info->port.count++ == 0) { + if (port->count++ == 0) { atomic_inc(&rp_num_ports_open); #ifdef ROCKET_DEBUG_OPEN @@ -1019,7 +934,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) /* * Info->count is now 1; so it's safe to sleep now. */ - if ((info->flags & ROCKET_INITIALIZED) == 0) { + if (!test_bit(ASYNC_INITIALIZED, &port->flags)) { cp = &info->channel; sSetRxTrigger(cp, TRIG_1); if (sGetChanStatus(cp) & CD_ACT) @@ -1043,21 +958,21 @@ static int rp_open(struct tty_struct *tty, struct file *filp) sEnRxFIFO(cp); sEnTransmit(cp); - info->flags |= ROCKET_INITIALIZED; + set_bit(ASYNC_INITIALIZED, &info->port.flags); /* * Set up the tty->alt_speed kludge */ if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI) - info->port.tty->alt_speed = 57600; + tty->alt_speed = 57600; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI) - info->port.tty->alt_speed = 115200; + tty->alt_speed = 115200; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI) - info->port.tty->alt_speed = 230400; + tty->alt_speed = 230400; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP) - info->port.tty->alt_speed = 460800; + tty->alt_speed = 460800; - configure_r_port(info, NULL); + configure_r_port(tty, info, NULL); if (tty->termios->c_cflag & CBAUD) { sSetDTR(cp); sSetRTS(cp); @@ -1066,7 +981,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) /* Starts (or resets) the maint polling loop */ mod_timer(&rocket_timer, jiffies + POLL_PERIOD); - retval = block_til_ready(tty, filp, info); + retval = tty_port_block_til_ready(port, tty, filp); if (retval) { #ifdef ROCKET_DEBUG_OPEN printk(KERN_INFO "rp_open returning after block_til_ready with %d\n", retval); @@ -1081,8 +996,8 @@ static int rp_open(struct tty_struct *tty, struct file *filp) */ static void rp_close(struct tty_struct *tty, struct file *filp) { - struct r_port *info = (struct r_port *) tty->driver_data; - unsigned long flags; + struct r_port *info = tty->driver_data; + struct tty_port *port = &info->port; int timeout; CHANNEL_t *cp; @@ -1093,53 +1008,10 @@ static void rp_close(struct tty_struct *tty, struct file *filp) printk(KERN_INFO "rp_close ttyR%d, count = %d\n", info->line, info->port.count); #endif - if (tty_hung_up_p(filp)) - return; - spin_lock_irqsave(&info->slock, flags); - - if ((tty->count == 1) && (info->port.count != 1)) { - /* - * Uh, oh. tty->count is 1, which means that the tty - * structure will be freed. Info->count should always - * be one in these conditions. If it's greater than - * one, we've got real problems, since it means the - * serial port won't be shutdown. - */ - printk(KERN_WARNING "rp_close: bad serial port count; " - "tty->count is 1, info->port.count is %d\n", info->port.count); - info->port.count = 1; - } - if (--info->port.count < 0) { - printk(KERN_WARNING "rp_close: bad serial port count for " - "ttyR%d: %d\n", info->line, info->port.count); - info->port.count = 0; - } - if (info->port.count) { - spin_unlock_irqrestore(&info->slock, flags); + if (tty_port_close_start(port, tty, filp) == 0) return; - } - info->flags |= ROCKET_CLOSING; - spin_unlock_irqrestore(&info->slock, flags); cp = &info->channel; - - /* - * Notify the line discpline to only process XON/XOFF characters - */ - tty->closing = 1; - - /* - * If transmission was throttled by the application request, - * just flush the xmit buffer. - */ - if (tty->flow_stopped) - rp_flush_buffer(tty); - - /* - * Wait for the transmit buffer to clear - */ - if (info->port.closing_wait != ROCKET_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, info->port.closing_wait); /* * Before we drop DTR, make sure the UART transmitter * has completely drained; this is especially @@ -1168,19 +1040,24 @@ static void rp_close(struct tty_struct *tty, struct file *filp) clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]); - if (info->port.blocked_open) { - if (info->port.close_delay) { - msleep_interruptible(jiffies_to_msecs(info->port.close_delay)); + /* We can't yet use tty_port_close_end as the buffer handling in this + driver is a bit different to the usual */ + + if (port->blocked_open) { + if (port->close_delay) { + msleep_interruptible(jiffies_to_msecs(port->close_delay)); } - wake_up_interruptible(&info->port.open_wait); + wake_up_interruptible(&port->open_wait); } else { if (info->xmit_buf) { free_page((unsigned long) info->xmit_buf); info->xmit_buf = NULL; } } - info->flags &= ~(ROCKET_INITIALIZED | ROCKET_CLOSING | ROCKET_NORMAL_ACTIVE); + info->port.flags &= ~(ASYNC_INITIALIZED | ASYNC_CLOSING | ASYNC_NORMAL_ACTIVE); tty->closing = 0; + tty_port_tty_set(port, NULL); + wake_up_interruptible(&port->close_wait); complete_all(&info->close_wait); atomic_dec(&rp_num_ports_open); @@ -1195,7 +1072,7 @@ static void rp_close(struct tty_struct *tty, struct file *filp) static void rp_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; unsigned cflag; @@ -1213,7 +1090,7 @@ static void rp_set_termios(struct tty_struct *tty, /* Or CMSPAR */ tty->termios->c_cflag &= ~CMSPAR; - configure_r_port(info, old_termios); + configure_r_port(tty, info, old_termios); cp = &info->channel; @@ -1238,7 +1115,7 @@ static void rp_set_termios(struct tty_struct *tty, static int rp_break(struct tty_struct *tty, int break_state) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; unsigned long flags; if (rocket_paranoia_check(info, "rp_break")) @@ -1284,7 +1161,7 @@ static int sGetChanRI(CHANNEL_T * ChP) */ static int rp_tiocmget(struct tty_struct *tty, struct file *file) { - struct r_port *info = (struct r_port *)tty->driver_data; + struct r_port *info = tty->driver_data; unsigned int control, result, ChanStatus; ChanStatus = sGetChanStatusLo(&info->channel); @@ -1305,7 +1182,7 @@ static int rp_tiocmget(struct tty_struct *tty, struct file *file) static int rp_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct r_port *info = (struct r_port *)tty->driver_data; + struct r_port *info = tty->driver_data; if (set & TIOCM_RTS) info->channel.TxControl[3] |= SET_RTS; @@ -1338,7 +1215,8 @@ static int get_config(struct r_port *info, struct rocket_config __user *retinfo) return 0; } -static int set_config(struct r_port *info, struct rocket_config __user *new_info) +static int set_config(struct tty_struct *tty, struct r_port *info, + struct rocket_config __user *new_info) { struct rocket_config new_serial; @@ -1350,7 +1228,7 @@ static int set_config(struct r_port *info, struct rocket_config __user *new_info if ((new_serial.flags & ~ROCKET_USR_MASK) != (info->flags & ~ROCKET_USR_MASK)) return -EPERM; info->flags = ((info->flags & ~ROCKET_USR_MASK) | (new_serial.flags & ROCKET_USR_MASK)); - configure_r_port(info, NULL); + configure_r_port(tty, info, NULL); return 0; } @@ -1359,15 +1237,15 @@ static int set_config(struct r_port *info, struct rocket_config __user *new_info info->port.closing_wait = new_serial.closing_wait; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_HI) - info->port.tty->alt_speed = 57600; + tty->alt_speed = 57600; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_VHI) - info->port.tty->alt_speed = 115200; + tty->alt_speed = 115200; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_SHI) - info->port.tty->alt_speed = 230400; + tty->alt_speed = 230400; if ((info->flags & ROCKET_SPD_MASK) == ROCKET_SPD_WARP) - info->port.tty->alt_speed = 460800; + tty->alt_speed = 460800; - configure_r_port(info, NULL); + configure_r_port(tty, info, NULL); return 0; } @@ -1434,7 +1312,7 @@ static int get_version(struct r_port *info, struct rocket_version __user *retver static int rp_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; void __user *argp = (void __user *)arg; int ret = 0; @@ -1452,7 +1330,7 @@ static int rp_ioctl(struct tty_struct *tty, struct file *file, ret = get_config(info, argp); break; case RCKP_SET_CONFIG: - ret = set_config(info, argp); + ret = set_config(tty, info, argp); break; case RCKP_GET_PORTS: ret = get_ports(info, argp); @@ -1472,7 +1350,7 @@ static int rp_ioctl(struct tty_struct *tty, struct file *file, static void rp_send_xchar(struct tty_struct *tty, char ch) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; if (rocket_paranoia_check(info, "rp_send_xchar")) @@ -1487,7 +1365,7 @@ static void rp_send_xchar(struct tty_struct *tty, char ch) static void rp_throttle(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; #ifdef ROCKET_DEBUG_THROTTLE @@ -1507,7 +1385,7 @@ static void rp_throttle(struct tty_struct *tty) static void rp_unthrottle(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; #ifdef ROCKET_DEBUG_THROTTLE printk(KERN_INFO "unthrottle %s: %d....\n", tty->name, @@ -1534,7 +1412,7 @@ static void rp_unthrottle(struct tty_struct *tty) */ static void rp_stop(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; #ifdef ROCKET_DEBUG_FLOW printk(KERN_INFO "stop %s: %d %d....\n", tty->name, @@ -1550,7 +1428,7 @@ static void rp_stop(struct tty_struct *tty) static void rp_start(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; #ifdef ROCKET_DEBUG_FLOW printk(KERN_INFO "start %s: %d %d....\n", tty->name, @@ -1570,7 +1448,7 @@ static void rp_start(struct tty_struct *tty) */ static void rp_wait_until_sent(struct tty_struct *tty, int timeout) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; unsigned long orig_jiffies; int check_time, exit_time; @@ -1627,7 +1505,7 @@ static void rp_wait_until_sent(struct tty_struct *tty, int timeout) static void rp_hangup(struct tty_struct *tty) { CHANNEL_t *cp; - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; if (rocket_paranoia_check(info, "rp_hangup")) return; @@ -1636,15 +1514,13 @@ static void rp_hangup(struct tty_struct *tty) printk(KERN_INFO "rp_hangup of ttyR%d...\n", info->line); #endif rp_flush_buffer(tty); - if (info->flags & ROCKET_CLOSING) + if (info->port.flags & ASYNC_CLOSING) return; if (info->port.count) atomic_dec(&rp_num_ports_open); clear_bit((info->aiop * 8) + info->chan, (void *) &xmit_flags[info->board]); - info->port.count = 0; - info->flags &= ~ROCKET_NORMAL_ACTIVE; - info->port.tty = NULL; + tty_port_hangup(&info->port); cp = &info->channel; sDisRxFIFO(cp); @@ -1653,7 +1529,7 @@ static void rp_hangup(struct tty_struct *tty) sDisCTSFlowCtl(cp); sDisTxSoftFlowCtl(cp); sClrTxXOFF(cp); - info->flags &= ~ROCKET_INITIALIZED; + info->port.flags &= ~ASYNC_INITIALIZED; wake_up_interruptible(&info->port.open_wait); } @@ -1667,7 +1543,7 @@ static void rp_hangup(struct tty_struct *tty) */ static int rp_put_char(struct tty_struct *tty, unsigned char ch) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; unsigned long flags; @@ -1714,7 +1590,7 @@ static int rp_put_char(struct tty_struct *tty, unsigned char ch) static int rp_write(struct tty_struct *tty, const unsigned char *buf, int count) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; const unsigned char *b; int c, retval = 0; @@ -1764,7 +1640,8 @@ static int rp_write(struct tty_struct *tty, /* Write remaining data into the port's xmit_buf */ while (1) { - if (!info->port.tty) /* Seemingly obligatory check... */ + /* Hung up ? */ + if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags)) goto end; c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1); c = min(c, XMIT_BUF_SIZE - info->xmit_head); @@ -1806,7 +1683,7 @@ end: */ static int rp_write_room(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; int ret; if (rocket_paranoia_check(info, "rp_write_room")) @@ -1827,7 +1704,7 @@ static int rp_write_room(struct tty_struct *tty) */ static int rp_chars_in_buffer(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; if (rocket_paranoia_check(info, "rp_chars_in_buffer")) @@ -1848,7 +1725,7 @@ static int rp_chars_in_buffer(struct tty_struct *tty) */ static void rp_flush_buffer(struct tty_struct *tty) { - struct r_port *info = (struct r_port *) tty->driver_data; + struct r_port *info = tty->driver_data; CHANNEL_t *cp; unsigned long flags; @@ -2371,6 +2248,11 @@ static const struct tty_operations rocket_ops = { .tiocmset = rp_tiocmset, }; +static const struct tty_port_operations rocket_port_ops = { + .carrier_raised = carrier_raised, + .raise_dtr_rts = raise_dtr_rts, +}; + /* * The module "startup" routine; it's run when the module is loaded. */ diff --git a/drivers/char/rocket.h b/drivers/char/rocket.h index a8b09195ebba..ec863f35f1a9 100644 --- a/drivers/char/rocket.h +++ b/drivers/char/rocket.h @@ -39,7 +39,7 @@ struct rocket_version { /* * Rocketport flags */ -#define ROCKET_CALLOUT_NOHUP 0x00000001 +/*#define ROCKET_CALLOUT_NOHUP 0x00000001 */ #define ROCKET_FORCE_CD 0x00000002 #define ROCKET_HUP_NOTIFY 0x00000004 #define ROCKET_SPLIT_TERMIOS 0x00000008 diff --git a/drivers/char/rocket_int.h b/drivers/char/rocket_int.h index 21f3ff53ba32..67e0f1e778a2 100644 --- a/drivers/char/rocket_int.h +++ b/drivers/char/rocket_int.h @@ -1162,11 +1162,6 @@ struct r_port { /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 -/* Internal flags used only by the rocketport driver */ -#define ROCKET_INITIALIZED 0x80000000 /* Port is active */ -#define ROCKET_CLOSING 0x40000000 /* Serial port is closing */ -#define ROCKET_NORMAL_ACTIVE 0x20000000 /* Normal port is active */ - /* * Assigned major numbers for the Comtrol Rocketport */ diff --git a/drivers/char/selection.c b/drivers/char/selection.c index 2978a49a172b..f29fbe9b8ed7 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -306,7 +306,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t */ int paste_selection(struct tty_struct *tty) { - struct vc_data *vc = (struct vc_data *)tty->driver_data; + struct vc_data *vc = tty->driver_data; int pasted = 0; unsigned int count; struct tty_ldisc *ld; diff --git a/drivers/char/ser_a2232.c b/drivers/char/ser_a2232.c index 7b0c35207d9b..33872a219df6 100644 --- a/drivers/char/ser_a2232.c +++ b/drivers/char/ser_a2232.c @@ -122,7 +122,7 @@ static void a2232_disable_tx_interrupts(void *ptr); static void a2232_enable_tx_interrupts(void *ptr); static void a2232_disable_rx_interrupts(void *ptr); static void a2232_enable_rx_interrupts(void *ptr); -static int a2232_get_CD(void *ptr); +static int a2232_carrier_raised(struct tty_port *port); static void a2232_shutdown_port(void *ptr); static int a2232_set_real_termios(void *ptr); static int a2232_chars_in_buffer(void *ptr); @@ -148,7 +148,6 @@ static struct real_driver a2232_real_driver = { a2232_enable_tx_interrupts, a2232_disable_rx_interrupts, a2232_enable_rx_interrupts, - a2232_get_CD, a2232_shutdown_port, a2232_set_real_termios, a2232_chars_in_buffer, @@ -260,9 +259,10 @@ static void a2232_enable_rx_interrupts(void *ptr) port->disable_rx = 0; } -static int a2232_get_CD(void *ptr) +static int a2232_carrier_raised(struct tty_port *port) { - return ((struct a2232_port *) ptr)->cd_status; + struct a2232_port *ap = container_of(port, struct a2232_port, gs.port); + return ap->cd_status; } static void a2232_shutdown_port(void *ptr) @@ -460,14 +460,14 @@ static void a2232_throttle(struct tty_struct *tty) if switched on. So the only thing we can do at this layer here is not taking any characters out of the A2232 buffer any more. */ - struct a2232_port *port = (struct a2232_port *) tty->driver_data; + struct a2232_port *port = tty->driver_data; port->throttle_input = -1; } static void a2232_unthrottle(struct tty_struct *tty) { /* Unthrottle: dual to "throttle()" above. */ - struct a2232_port *port = (struct a2232_port *) tty->driver_data; + struct a2232_port *port = tty->driver_data; port->throttle_input = 0; } @@ -638,6 +638,10 @@ int ch, err, n, p; return IRQ_HANDLED; } +static const struct tty_port_operations a2232_port_ops = { + .carrier_raised = a2232_carrier_raised, +}; + static void a2232_init_portstructs(void) { struct a2232_port *port; @@ -645,6 +649,8 @@ static void a2232_init_portstructs(void) for (i = 0; i < MAX_A2232_BOARDS*NUMLINES; i++) { port = a2232_ports + i; + tty_port_init(&port->gs.port); + port->gs.port.ops = &a2232_port_ops; port->which_a2232 = i/NUMLINES; port->which_port_on_a2232 = i%NUMLINES; port->disable_rx = port->throttle_input = port->cd_status = 0; @@ -652,11 +658,6 @@ static void a2232_init_portstructs(void) port->gs.close_delay = HZ/2; port->gs.closing_wait = 30 * HZ; port->gs.rd = &a2232_real_driver; -#ifdef NEW_WRITE_LOCKING - mutex_init(&(port->gs.port_write_mutex)); -#endif - init_waitqueue_head(&port->gs.port.open_wait); - init_waitqueue_head(&port->gs.port.close_wait); } } diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c index a8f15e6be594..f1f24f0ee26f 100644 --- a/drivers/char/serial167.c +++ b/drivers/char/serial167.c @@ -315,7 +315,7 @@ u_short write_cy_cmd(volatile u_char * base_addr, u_char cmd) static void cy_stop(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR; int channel; unsigned long flags; @@ -337,7 +337,7 @@ static void cy_stop(struct tty_struct *tty) static void cy_start(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; volatile unsigned char *base_addr = (unsigned char *)BASE_ADDR; int channel; unsigned long flags; @@ -1062,7 +1062,7 @@ static void config_setup(struct cyclades_port *info) static int cy_put_char(struct tty_struct *tty, unsigned char ch) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; #ifdef SERIAL_DEBUG_IO @@ -1090,7 +1090,7 @@ static int cy_put_char(struct tty_struct *tty, unsigned char ch) static void cy_flush_chars(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; volatile unsigned char *base_addr = (u_char *) BASE_ADDR; int channel; @@ -1122,7 +1122,7 @@ static void cy_flush_chars(struct tty_struct *tty) */ static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; int c, total = 0; @@ -1166,7 +1166,7 @@ static int cy_write(struct tty_struct *tty, const unsigned char *buf, int count) static int cy_write_room(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; int ret; #ifdef SERIAL_DEBUG_IO @@ -1183,7 +1183,7 @@ static int cy_write_room(struct tty_struct *tty) static int cy_chars_in_buffer(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; #ifdef SERIAL_DEBUG_IO printk("cy_chars_in_buffer %s %d\n", tty->name, info->xmit_cnt); /* */ @@ -1197,7 +1197,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty) static void cy_flush_buffer(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; #ifdef SERIAL_DEBUG_IO @@ -1218,7 +1218,7 @@ static void cy_flush_buffer(struct tty_struct *tty) */ static void cy_throttle(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; volatile unsigned char *base_addr = (u_char *) BASE_ADDR; int channel; @@ -1250,7 +1250,7 @@ static void cy_throttle(struct tty_struct *tty) static void cy_unthrottle(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; unsigned long flags; volatile unsigned char *base_addr = (u_char *) BASE_ADDR; int channel; @@ -1345,7 +1345,7 @@ check_and_exit: static int cy_tiocmget(struct tty_struct *tty, struct file *file) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; int channel; volatile unsigned char *base_addr = (u_char *) BASE_ADDR; unsigned long flags; @@ -1369,7 +1369,7 @@ static int cy_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; int channel; volatile unsigned char *base_addr = (u_char *) BASE_ADDR; unsigned long flags; @@ -1532,7 +1532,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { unsigned long val; - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; int ret_val = 0; void __user *argp = (void __user *)arg; @@ -1607,7 +1607,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file, static void cy_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; #ifdef SERIAL_DEBUG_OTHER printk("cy_set_termios %s\n", tty->name); @@ -1631,7 +1631,7 @@ static void cy_set_termios(struct tty_struct *tty, struct ktermios *old_termios) static void cy_close(struct tty_struct *tty, struct file *filp) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; /* CP('C'); */ #ifdef SERIAL_DEBUG_OTHER @@ -1698,7 +1698,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp) */ void cy_hangup(struct tty_struct *tty) { - struct cyclades_port *info = (struct cyclades_port *)tty->driver_data; + struct cyclades_port *info = tty->driver_data; #ifdef SERIAL_DEBUG_OTHER printk("cy_hangup %s\n", tty->name); /* */ diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c index a16b94f12eb2..3c67c3d83de9 100644 --- a/drivers/char/specialix.c +++ b/drivers/char/specialix.c @@ -1450,7 +1450,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp) static void sx_flush_buffer(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; unsigned long flags; struct specialix_board *bp; @@ -1472,7 +1472,7 @@ static void sx_flush_buffer(struct tty_struct *tty) static void sx_close(struct tty_struct *tty, struct file *filp) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; unsigned long timeout; @@ -1585,7 +1585,7 @@ static void sx_close(struct tty_struct *tty, struct file *filp) static int sx_write(struct tty_struct *tty, const unsigned char *buf, int count) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; int c, total = 0; unsigned long flags; @@ -1637,7 +1637,7 @@ static int sx_write(struct tty_struct *tty, static int sx_put_char(struct tty_struct *tty, unsigned char ch) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; unsigned long flags; struct specialix_board *bp; @@ -1676,7 +1676,7 @@ static int sx_put_char(struct tty_struct *tty, unsigned char ch) static void sx_flush_chars(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; unsigned long flags; struct specialix_board *bp = port_Board(port); @@ -1703,7 +1703,7 @@ static void sx_flush_chars(struct tty_struct *tty) static int sx_write_room(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; int ret; func_enter(); @@ -1724,7 +1724,7 @@ static int sx_write_room(struct tty_struct *tty) static int sx_chars_in_buffer(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; func_enter(); @@ -1738,7 +1738,7 @@ static int sx_chars_in_buffer(struct tty_struct *tty) static int sx_tiocmget(struct tty_struct *tty, struct file *file) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned char status; unsigned int result; @@ -1780,7 +1780,7 @@ static int sx_tiocmget(struct tty_struct *tty, struct file *file) static int sx_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; unsigned long flags; struct specialix_board *bp; @@ -1820,7 +1820,7 @@ static int sx_tiocmset(struct tty_struct *tty, struct file *file, static int sx_send_break(struct tty_struct *tty, int length) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp = port_Board(port); unsigned long flags; @@ -1931,7 +1931,7 @@ static int sx_get_serial_info(struct specialix_port *port, static int sx_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd, unsigned long arg) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; void __user *argp = (void __user *)arg; func_enter(); @@ -1959,7 +1959,7 @@ static int sx_ioctl(struct tty_struct *tty, struct file *filp, static void sx_throttle(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; @@ -2004,7 +2004,7 @@ static void sx_throttle(struct tty_struct *tty) static void sx_unthrottle(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; @@ -2045,7 +2045,7 @@ static void sx_unthrottle(struct tty_struct *tty) static void sx_stop(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; @@ -2072,7 +2072,7 @@ static void sx_stop(struct tty_struct *tty) static void sx_start(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; @@ -2100,7 +2100,7 @@ static void sx_start(struct tty_struct *tty) static void sx_hangup(struct tty_struct *tty) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; struct specialix_board *bp; unsigned long flags; @@ -2135,7 +2135,7 @@ static void sx_hangup(struct tty_struct *tty) static void sx_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct specialix_port *port = (struct specialix_port *)tty->driver_data; + struct specialix_port *port = tty->driver_data; unsigned long flags; struct specialix_board *bp; diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 963b03fb29e5..e1e0dd89ac9a 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -130,6 +130,8 @@ static char stl_unwanted[SC26198_RXFIFOSIZE]; static DEFINE_MUTEX(stl_brdslock); static struct stlbrd *stl_brds[STL_MAXBRDS]; +static const struct tty_port_operations stl_port_ops; + /* * Per board state flags. Used with the state field of the board struct. * Not really much here! @@ -407,7 +409,6 @@ static int stl_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, uns static int stl_brdinit(struct stlbrd *brdp); static int stl_getportstats(struct tty_struct *tty, struct stlport *portp, comstats_t __user *cp); static int stl_clrportstats(struct stlport *portp, comstats_t __user *cp); -static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp, struct file *filp); /* * CD1400 uart specific handling functions. @@ -703,8 +704,9 @@ static int stl_open(struct tty_struct *tty, struct file *filp) { struct stlport *portp; struct stlbrd *brdp; + struct tty_port *port; unsigned int minordev, brdnr, panelnr; - int portnr, rc; + int portnr; pr_debug("stl_open(tty=%p,filp=%p): device=%s\n", tty, filp, tty->name); @@ -715,6 +717,7 @@ static int stl_open(struct tty_struct *tty, struct file *filp) brdp = stl_brds[brdnr]; if (brdp == NULL) return -ENODEV; + minordev = MINOR2PORT(minordev); for (portnr = -1, panelnr = 0; panelnr < STL_MAXPANELS; panelnr++) { if (brdp->panels[panelnr] == NULL) @@ -731,16 +734,17 @@ static int stl_open(struct tty_struct *tty, struct file *filp) portp = brdp->panels[panelnr]->ports[portnr]; if (portp == NULL) return -ENODEV; + port = &portp->port; /* * On the first open of the device setup the port hardware, and * initialize the per port data structure. */ - tty_port_tty_set(&portp->port, tty); + tty_port_tty_set(port, tty); tty->driver_data = portp; - portp->port.count++; + port->count++; - if ((portp->port.flags & ASYNC_INITIALIZED) == 0) { + if ((port->flags & ASYNC_INITIALIZED) == 0) { if (!portp->tx.buf) { portp->tx.buf = kmalloc(STL_TXBUFSIZE, GFP_KERNEL); if (!portp->tx.buf) @@ -754,91 +758,24 @@ static int stl_open(struct tty_struct *tty, struct file *filp) stl_enablerxtx(portp, 1, 1); stl_startrxtx(portp, 1, 0); clear_bit(TTY_IO_ERROR, &tty->flags); - portp->port.flags |= ASYNC_INITIALIZED; - } - -/* - * Check if this port is in the middle of closing. If so then wait - * until it is closed then return error status, based on flag settings. - * The sleep here does not need interrupt protection since the wakeup - * for it is done with the same context. - */ - if (portp->port.flags & ASYNC_CLOSING) { - interruptible_sleep_on(&portp->port.close_wait); - if (portp->port.flags & ASYNC_HUP_NOTIFY) - return -EAGAIN; - return -ERESTARTSYS; + port->flags |= ASYNC_INITIALIZED; } - -/* - * Based on type of open being done check if it can overlap with any - * previous opens still in effect. If we are a normal serial device - * then also we might have to wait for carrier. - */ - if (!(filp->f_flags & O_NONBLOCK)) - if ((rc = stl_waitcarrier(tty, portp, filp)) != 0) - return rc; - - portp->port.flags |= ASYNC_NORMAL_ACTIVE; - - return 0; + return tty_port_block_til_ready(port, tty, filp); } /*****************************************************************************/ -/* - * Possibly need to wait for carrier (DCD signal) to come high. Say - * maybe because if we are clocal then we don't need to wait... - */ - -static int stl_waitcarrier(struct tty_struct *tty, struct stlport *portp, - struct file *filp) +static int stl_carrier_raised(struct tty_port *port) { - unsigned long flags; - int rc, doclocal; - - pr_debug("stl_waitcarrier(portp=%p,filp=%p)\n", portp, filp); - - rc = 0; - doclocal = 0; - - spin_lock_irqsave(&stallion_lock, flags); - - if (tty->termios->c_cflag & CLOCAL) - doclocal++; - - portp->openwaitcnt++; - if (! tty_hung_up_p(filp)) - portp->port.count--; - - for (;;) { - /* Takes brd_lock internally */ - stl_setsignals(portp, 1, 1); - if (tty_hung_up_p(filp) || - ((portp->port.flags & ASYNC_INITIALIZED) == 0)) { - if (portp->port.flags & ASYNC_HUP_NOTIFY) - rc = -EBUSY; - else - rc = -ERESTARTSYS; - break; - } - if (((portp->port.flags & ASYNC_CLOSING) == 0) && - (doclocal || (portp->sigs & TIOCM_CD))) - break; - if (signal_pending(current)) { - rc = -ERESTARTSYS; - break; - } - /* FIXME */ - interruptible_sleep_on(&portp->port.open_wait); - } - - if (! tty_hung_up_p(filp)) - portp->port.count++; - portp->openwaitcnt--; - spin_unlock_irqrestore(&stallion_lock, flags); + struct stlport *portp = container_of(port, struct stlport, port); + return (portp->sigs & TIOCM_CD) ? 1 : 0; +} - return rc; +static void stl_raise_dtr_rts(struct tty_port *port) +{ + struct stlport *portp = container_of(port, struct stlport, port); + /* Takes brd_lock internally */ + stl_setsignals(portp, 1, 1); } /*****************************************************************************/ @@ -890,47 +827,29 @@ static void stl_waituntilsent(struct tty_struct *tty, int timeout) static void stl_close(struct tty_struct *tty, struct file *filp) { struct stlport *portp; + struct tty_port *port; unsigned long flags; pr_debug("stl_close(tty=%p,filp=%p)\n", tty, filp); portp = tty->driver_data; - if (portp == NULL) - return; + BUG_ON(portp == NULL); - spin_lock_irqsave(&stallion_lock, flags); - if (tty_hung_up_p(filp)) { - spin_unlock_irqrestore(&stallion_lock, flags); - return; - } - if ((tty->count == 1) && (portp->port.count != 1)) - portp->port.count = 1; - if (portp->port.count-- > 1) { - spin_unlock_irqrestore(&stallion_lock, flags); - return; - } - - portp->port.count = 0; - portp->port.flags |= ASYNC_CLOSING; + port = &portp->port; + if (tty_port_close_start(port, tty, filp) == 0) + return; /* * May want to wait for any data to drain before closing. The BUSY * flag keeps track of whether we are still sending or not - it is * very accurate for the cd1400, not quite so for the sc26198. * (The sc26198 has no "end-of-data" interrupt only empty FIFO) */ - tty->closing = 1; - - spin_unlock_irqrestore(&stallion_lock, flags); - - if (portp->closing_wait != ASYNC_CLOSING_WAIT_NONE) - tty_wait_until_sent(tty, portp->closing_wait); stl_waituntilsent(tty, (HZ / 2)); - - spin_lock_irqsave(&stallion_lock, flags); + spin_lock_irqsave(&port->lock, flags); portp->port.flags &= ~ASYNC_INITIALIZED; - spin_unlock_irqrestore(&stallion_lock, flags); + spin_unlock_irqrestore(&port->lock, flags); stl_disableintrs(portp); if (tty->termios->c_cflag & HUPCL) @@ -944,20 +863,9 @@ static void stl_close(struct tty_struct *tty, struct file *filp) portp->tx.head = NULL; portp->tx.tail = NULL; } - set_bit(TTY_IO_ERROR, &tty->flags); - tty_ldisc_flush(tty); - tty->closing = 0; - tty_port_tty_set(&portp->port, NULL); - - if (portp->openwaitcnt) { - if (portp->close_delay) - msleep_interruptible(jiffies_to_msecs(portp->close_delay)); - wake_up_interruptible(&portp->port.open_wait); - } - - portp->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - wake_up_interruptible(&portp->port.close_wait); + tty_port_close_end(port, tty); + tty_port_tty_set(port, NULL); } /*****************************************************************************/ @@ -1405,14 +1313,20 @@ static void stl_stop(struct tty_struct *tty) static void stl_hangup(struct tty_struct *tty) { struct stlport *portp; + struct tty_port *port; + unsigned long flags; pr_debug("stl_hangup(tty=%p)\n", tty); portp = tty->driver_data; if (portp == NULL) return; + port = &portp->port; + + spin_lock_irqsave(&port->lock, flags); + port->flags &= ~ASYNC_INITIALIZED; + spin_unlock_irqrestore(&port->lock, flags); - portp->port.flags &= ~ASYNC_INITIALIZED; stl_disableintrs(portp); if (tty->termios->c_cflag & HUPCL) stl_setsignals(portp, 0, 0); @@ -1426,10 +1340,7 @@ static void stl_hangup(struct tty_struct *tty) portp->tx.head = NULL; portp->tx.tail = NULL; } - tty_port_tty_set(&portp->port, NULL); - portp->port.flags &= ~ASYNC_NORMAL_ACTIVE; - portp->port.count = 0; - wake_up_interruptible(&portp->port.open_wait); + tty_port_hangup(port); } /*****************************************************************************/ @@ -1776,6 +1687,7 @@ static int __devinit stl_initports(struct stlbrd *brdp, struct stlpanel *panelp) break; } tty_port_init(&portp->port); + portp->port.ops = &stl_port_ops; portp->magic = STL_PORTMAGIC; portp->portnr = i; portp->brdnr = panelp->brdnr; @@ -2659,6 +2571,11 @@ static const struct tty_operations stl_ops = { .tiocmset = stl_tiocmset, }; +static const struct tty_port_operations stl_port_ops = { + .carrier_raised = stl_carrier_raised, + .raise_dtr_rts = stl_raise_dtr_rts, +}; + /*****************************************************************************/ /* CD1400 HARDWARE FUNCTIONS */ /*****************************************************************************/ diff --git a/drivers/char/sx.c b/drivers/char/sx.c index ba4e86281fbf..b60be7b0decf 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -279,7 +279,7 @@ static void sx_disable_tx_interrupts(void *ptr); static void sx_enable_tx_interrupts(void *ptr); static void sx_disable_rx_interrupts(void *ptr); static void sx_enable_rx_interrupts(void *ptr); -static int sx_get_CD(void *ptr); +static int sx_carrier_raised(struct tty_port *port); static void sx_shutdown_port(void *ptr); static int sx_set_real_termios(void *ptr); static void sx_close(void *ptr); @@ -360,7 +360,6 @@ static struct real_driver sx_real_driver = { sx_enable_tx_interrupts, sx_disable_rx_interrupts, sx_enable_rx_interrupts, - sx_get_CD, sx_shutdown_port, sx_set_real_termios, sx_chars_in_buffer, @@ -791,7 +790,7 @@ static int sx_getsignals(struct sx_port *port) sx_dprintk(SX_DEBUG_MODEMSIGNALS, "getsignals: %d/%d (%d/%d) " "%02x/%02x\n", (o_stat & OP_DTR) != 0, (o_stat & OP_RTS) != 0, - port->c_dcd, sx_get_CD(port), + port->c_dcd, tty_port_carrier_raised(&port->gs.port), sx_read_channel_byte(port, hi_ip), sx_read_channel_byte(port, hi_state)); @@ -1190,7 +1189,7 @@ static inline void sx_check_modem_signals(struct sx_port *port) hi_state = sx_read_channel_byte(port, hi_state); sx_dprintk(SX_DEBUG_MODEMSIGNALS, "Checking modem signals (%d/%d)\n", - port->c_dcd, sx_get_CD(port)); + port->c_dcd, tty_port_carrier_raised(&port->gs.port)); if (hi_state & ST_BREAK) { hi_state &= ~ST_BREAK; @@ -1202,11 +1201,11 @@ static inline void sx_check_modem_signals(struct sx_port *port) hi_state &= ~ST_DCD; sx_dprintk(SX_DEBUG_MODEMSIGNALS, "got a DCD change.\n"); sx_write_channel_byte(port, hi_state, hi_state); - c_dcd = sx_get_CD(port); + c_dcd = tty_port_carrier_raised(&port->gs.port); sx_dprintk(SX_DEBUG_MODEMSIGNALS, "DCD is now %d\n", c_dcd); if (c_dcd != port->c_dcd) { port->c_dcd = c_dcd; - if (sx_get_CD(port)) { + if (tty_port_carrier_raised(&port->gs.port)) { /* DCD went UP */ if ((sx_read_channel_byte(port, hi_hstat) != HS_IDLE_CLOSED) && @@ -1415,13 +1414,10 @@ static void sx_enable_rx_interrupts(void *ptr) } /* Jeez. Isn't this simple? */ -static int sx_get_CD(void *ptr) +static int sx_carrier_raised(struct tty_port *port) { - struct sx_port *port = ptr; - func_enter2(); - - func_exit(); - return ((sx_read_channel_byte(port, hi_ip) & IP_DCD) != 0); + struct sx_port *sp = container_of(port, struct sx_port, gs.port); + return ((sx_read_channel_byte(sp, hi_ip) & IP_DCD) != 0); } /* Jeez. Isn't this simple? */ @@ -1536,7 +1532,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp) } /* tty->low_latency = 1; */ - port->c_dcd = sx_get_CD(port); + port->c_dcd = sx_carrier_raised(&port->gs.port); sx_dprintk(SX_DEBUG_OPEN, "at open: cd=%d\n", port->c_dcd); func_exit(); @@ -1945,7 +1941,7 @@ static int sx_ioctl(struct tty_struct *tty, struct file *filp, static void sx_throttle(struct tty_struct *tty) { - struct sx_port *port = (struct sx_port *)tty->driver_data; + struct sx_port *port = tty->driver_data; func_enter2(); /* If the port is using any type of input flow @@ -1959,7 +1955,7 @@ static void sx_throttle(struct tty_struct *tty) static void sx_unthrottle(struct tty_struct *tty) { - struct sx_port *port = (struct sx_port *)tty->driver_data; + struct sx_port *port = tty->driver_data; func_enter2(); /* Always unthrottle even if flow control is not enabled on @@ -2354,6 +2350,10 @@ static const struct tty_operations sx_ops = { .tiocmset = sx_tiocmset, }; +static const struct tty_port_operations sx_port_ops = { + .carrier_raised = sx_carrier_raised, +}; + static int sx_init_drivers(void) { int error; @@ -2410,6 +2410,7 @@ static int sx_init_portstructs(int nboards, int nports) for (j = 0; j < boards[i].nports; j++) { sx_dprintk(SX_DEBUG_INIT, "initing port %d\n", j); tty_port_init(&port->gs.port); + port->gs.port.ops = &sx_port_ops; port->gs.magic = SX_MAGIC; port->gs.close_delay = HZ / 2; port->gs.closing_wait = 30 * HZ; diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index 500f5176b6ba..b8063d4cad32 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -977,7 +977,7 @@ static void ldisc_receive_buf(struct tty_struct *tty, */ static void mgsl_stop(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (mgsl_paranoia_check(info, tty->name, "mgsl_stop")) @@ -1000,7 +1000,7 @@ static void mgsl_stop(struct tty_struct *tty) */ static void mgsl_start(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (mgsl_paranoia_check(info, tty->name, "mgsl_start")) @@ -2057,7 +2057,7 @@ static int mgsl_put_char(struct tty_struct *tty, unsigned char ch) */ static void mgsl_flush_chars(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if ( debug_level >= DEBUG_LEVEL_INFO ) @@ -2109,7 +2109,7 @@ static int mgsl_write(struct tty_struct * tty, const unsigned char *buf, int count) { int c, ret = 0; - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if ( debug_level >= DEBUG_LEVEL_INFO ) @@ -2232,7 +2232,7 @@ cleanup: */ static int mgsl_write_room(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; int ret; if (mgsl_paranoia_check(info, tty->name, "mgsl_write_room")) @@ -2267,7 +2267,7 @@ static int mgsl_write_room(struct tty_struct *tty) */ static int mgsl_chars_in_buffer(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_chars_in_buffer(%s)\n", @@ -2301,7 +2301,7 @@ static int mgsl_chars_in_buffer(struct tty_struct *tty) */ static void mgsl_flush_buffer(struct tty_struct *tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2329,7 +2329,7 @@ static void mgsl_flush_buffer(struct tty_struct *tty) */ static void mgsl_send_xchar(struct tty_struct *tty, char ch) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2358,7 +2358,7 @@ static void mgsl_send_xchar(struct tty_struct *tty, char ch) */ static void mgsl_throttle(struct tty_struct * tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2388,7 +2388,7 @@ static void mgsl_throttle(struct tty_struct * tty) */ static void mgsl_unthrottle(struct tty_struct * tty) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2841,7 +2841,7 @@ static int modem_input_wait(struct mgsl_struct *info,int arg) */ static int tiocmget(struct tty_struct *tty, struct file *file) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned int result; unsigned long flags; @@ -2867,7 +2867,7 @@ static int tiocmget(struct tty_struct *tty, struct file *file) static int tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2898,7 +2898,7 @@ static int tiocmset(struct tty_struct *tty, struct file *file, */ static int mgsl_break(struct tty_struct *tty, int break_state) { - struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct * info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -2932,7 +2932,7 @@ static int mgsl_break(struct tty_struct *tty, int break_state) static int mgsl_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) { - struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct * info = tty->driver_data; int ret; if (debug_level >= DEBUG_LEVEL_INFO) @@ -3042,7 +3042,7 @@ static int mgsl_ioctl_common(struct mgsl_struct *info, unsigned int cmd, unsigne */ static void mgsl_set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - struct mgsl_struct *info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -3096,7 +3096,7 @@ static void mgsl_set_termios(struct tty_struct *tty, struct ktermios *old_termio */ static void mgsl_close(struct tty_struct *tty, struct file * filp) { - struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct * info = tty->driver_data; if (mgsl_paranoia_check(info, tty->name, "mgsl_close")) return; @@ -3104,70 +3104,18 @@ static void mgsl_close(struct tty_struct *tty, struct file * filp) if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_close(%s) entry, count=%d\n", __FILE__,__LINE__, info->device_name, info->port.count); - - if (!info->port.count) - return; - if (tty_hung_up_p(filp)) + if (tty_port_close_start(&info->port, tty, filp) == 0) goto cleanup; - if ((tty->count == 1) && (info->port.count != 1)) { - /* - * tty->count is 1 and the tty structure will be freed. - * info->port.count should be one in this case. - * if it's not, correct it so that the port is shutdown. - */ - printk("mgsl_close: bad refcount; tty->count is 1, " - "info->port.count is %d\n", info->port.count); - info->port.count = 1; - } - - info->port.count--; - - /* if at least one open remaining, leave hardware active */ - if (info->port.count) - goto cleanup; - - info->port.flags |= ASYNC_CLOSING; - - /* set tty->closing to notify line discipline to - * only process XON/XOFF characters. Only the N_TTY - * discipline appears to use this (ppp does not). - */ - tty->closing = 1; - - /* wait for transmit data to clear all layers */ - - if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) { - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):mgsl_close(%s) calling tty_wait_until_sent\n", - __FILE__,__LINE__, info->device_name ); - tty_wait_until_sent(tty, info->port.closing_wait); - } - if (info->port.flags & ASYNC_INITIALIZED) mgsl_wait_until_sent(tty, info->timeout); - mgsl_flush_buffer(tty); - tty_ldisc_flush(tty); - shutdown(info); - - tty->closing = 0; + + tty_port_close_end(&info->port, tty); info->port.tty = NULL; - - if (info->port.blocked_open) { - if (info->port.close_delay) { - msleep_interruptible(jiffies_to_msecs(info->port.close_delay)); - } - wake_up_interruptible(&info->port.open_wait); - } - - info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - - wake_up_interruptible(&info->port.close_wait); - cleanup: if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_close(%s) exit, count=%d\n", __FILE__,__LINE__, @@ -3188,7 +3136,7 @@ cleanup: */ static void mgsl_wait_until_sent(struct tty_struct *tty, int timeout) { - struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct * info = tty->driver_data; unsigned long orig_jiffies, char_time; if (!info ) @@ -3261,7 +3209,7 @@ exit: */ static void mgsl_hangup(struct tty_struct *tty) { - struct mgsl_struct * info = (struct mgsl_struct *)tty->driver_data; + struct mgsl_struct * info = tty->driver_data; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):mgsl_hangup(%s)\n", @@ -3281,6 +3229,35 @@ static void mgsl_hangup(struct tty_struct *tty) } /* end of mgsl_hangup() */ +/* + * carrier_raised() + * + * Return true if carrier is raised + */ + +static int carrier_raised(struct tty_port *port) +{ + unsigned long flags; + struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); + + spin_lock_irqsave(&info->irq_spinlock, flags); + usc_get_serial_signals(info); + spin_unlock_irqrestore(&info->irq_spinlock, flags); + return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; +} + +static void raise_dtr_rts(struct tty_port *port) +{ + struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); + unsigned long flags; + + spin_lock_irqsave(&info->irq_spinlock,flags); + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + usc_set_serial_signals(info); + spin_unlock_irqrestore(&info->irq_spinlock,flags); +} + + /* block_til_ready() * * Block the current process until the specified port @@ -3302,6 +3279,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, bool do_clocal = false; bool extra_count = false; unsigned long flags; + int dcd; + struct tty_port *port = &info->port; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):block_til_ready on %s\n", @@ -3309,7 +3288,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -3318,50 +3297,42 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, info->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * mgsl_close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&info->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):block_til_ready before block on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); spin_lock_irqsave(&info->irq_spinlock, flags); if (!tty_hung_up_p(filp)) { extra_count = true; - info->port.count--; + port->count--; } spin_unlock_irqrestore(&info->irq_spinlock, flags); - info->port.blocked_open++; + port->blocked_open++; while (1) { - if (tty->termios->c_cflag & CBAUD) { - spin_lock_irqsave(&info->irq_spinlock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; - usc_set_serial_signals(info); - spin_unlock_irqrestore(&info->irq_spinlock,flags); - } + if (tty->termios->c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){ - retval = (info->port.flags & ASYNC_HUP_NOTIFY) ? + if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){ + retval = (port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } - spin_lock_irqsave(&info->irq_spinlock,flags); - usc_get_serial_signals(info); - spin_unlock_irqrestore(&info->irq_spinlock,flags); + dcd = tty_port_carrier_raised(&info->port); - if (!(info->port.flags & ASYNC_CLOSING) && - (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) { + if (!(port->flags & ASYNC_CLOSING) && (do_clocal || dcd)) break; - } if (signal_pending(current)) { retval = -ERESTARTSYS; @@ -3370,24 +3341,25 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp, if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):block_til_ready blocking on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(&info->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); + /* FIXME: Racy on hangup during close wait */ if (extra_count) - info->port.count++; - info->port.blocked_open--; + port->count++; + port->blocked_open--; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):block_til_ready after blocking on %s count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); if (!retval) - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return retval; @@ -4304,6 +4276,12 @@ static void mgsl_add_device( struct mgsl_struct *info ) } /* end of mgsl_add_device() */ +static const struct tty_port_operations mgsl_port_ops = { + .carrier_raised = carrier_raised, + .raise_dtr_rts = raise_dtr_rts, +}; + + /* mgsl_allocate_device() * * Allocate and initialize a device instance structure @@ -4322,6 +4300,7 @@ static struct mgsl_struct* mgsl_allocate_device(void) printk("Error can't allocate device instance data\n"); } else { tty_port_init(&info->port); + info->port.ops = &mgsl_port_ops; info->magic = MGSL_MAGIC; INIT_WORK(&info->task, mgsl_bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 08911ed66494..53544e21f191 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -720,44 +720,9 @@ static void close(struct tty_struct *tty, struct file *filp) return; DBGINFO(("%s close entry, count=%d\n", info->device_name, info->port.count)); - if (!info->port.count) - return; - - if (tty_hung_up_p(filp)) - goto cleanup; - - if ((tty->count == 1) && (info->port.count != 1)) { - /* - * tty->count is 1 and the tty structure will be freed. - * info->port.count should be one in this case. - * if it's not, correct it so that the port is shutdown. - */ - DBGERR(("%s close: bad refcount; tty->count=1, " - "info->port.count=%d\n", info->device_name, info->port.count)); - info->port.count = 1; - } - - info->port.count--; - - /* if at least one open remaining, leave hardware active */ - if (info->port.count) + if (tty_port_close_start(&info->port, tty, filp) == 0) goto cleanup; - info->port.flags |= ASYNC_CLOSING; - - /* set tty->closing to notify line discipline to - * only process XON/XOFF characters. Only the N_TTY - * discipline appears to use this (ppp does not). - */ - tty->closing = 1; - - /* wait for transmit data to clear all layers */ - - if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) { - DBGINFO(("%s call tty_wait_until_sent\n", info->device_name)); - tty_wait_until_sent(tty, info->port.closing_wait); - } - if (info->port.flags & ASYNC_INITIALIZED) wait_until_sent(tty, info->timeout); flush_buffer(tty); @@ -765,20 +730,8 @@ static void close(struct tty_struct *tty, struct file *filp) shutdown(info); - tty->closing = 0; + tty_port_close_end(&info->port, tty); info->port.tty = NULL; - - if (info->port.blocked_open) { - if (info->port.close_delay) { - msleep_interruptible(jiffies_to_msecs(info->port.close_delay)); - } - wake_up_interruptible(&info->port.open_wait); - } - - info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - - wake_up_interruptible(&info->port.close_wait); - cleanup: DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->port.count)); } @@ -3132,6 +3085,29 @@ static int tiocmset(struct tty_struct *tty, struct file *file, return 0; } +static int carrier_raised(struct tty_port *port) +{ + unsigned long flags; + struct slgt_info *info = container_of(port, struct slgt_info, port); + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + return (info->signals & SerialSignal_DCD) ? 1 : 0; +} + +static void raise_dtr_rts(struct tty_port *port) +{ + unsigned long flags; + struct slgt_info *info = container_of(port, struct slgt_info, port); + + spin_lock_irqsave(&info->lock,flags); + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); +} + + /* * block current process until the device is ready to open */ @@ -3143,12 +3119,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, bool do_clocal = false; bool extra_count = false; unsigned long flags; + int cd; + struct tty_port *port = &info->port; DBGINFO(("%s block_til_ready\n", tty->driver->name)); if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -3157,46 +3135,38 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, info->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&info->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); spin_lock_irqsave(&info->lock, flags); if (!tty_hung_up_p(filp)) { extra_count = true; - info->port.count--; + port->count--; } spin_unlock_irqrestore(&info->lock, flags); - info->port.blocked_open++; + port->blocked_open++; while (1) { - if ((tty->termios->c_cflag & CBAUD)) { - spin_lock_irqsave(&info->lock,flags); - info->signals |= SerialSignal_RTS + SerialSignal_DTR; - set_signals(info); - spin_unlock_irqrestore(&info->lock,flags); - } + if ((tty->termios->c_cflag & CBAUD)) + tty_port_raise_dtr_rts(port); set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){ - retval = (info->port.flags & ASYNC_HUP_NOTIFY) ? + if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){ + retval = (port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } - spin_lock_irqsave(&info->lock,flags); - get_signals(info); - spin_unlock_irqrestore(&info->lock,flags); + cd = tty_port_carrier_raised(port); - if (!(info->port.flags & ASYNC_CLOSING) && - (do_clocal || (info->signals & SerialSignal_DCD)) ) { + if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd )) break; - } if (signal_pending(current)) { retval = -ERESTARTSYS; @@ -3208,14 +3178,14 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, } set_current_state(TASK_RUNNING); - remove_wait_queue(&info->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); if (extra_count) - info->port.count++; - info->port.blocked_open--; + port->count++; + port->blocked_open--; if (!retval) - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval)); return retval; @@ -3444,6 +3414,11 @@ static void add_device(struct slgt_info *info) #endif } +static const struct tty_port_operations slgt_port_ops = { + .carrier_raised = carrier_raised, + .raise_dtr_rts = raise_dtr_rts, +}; + /* * allocate device instance structure, return NULL on failure */ @@ -3458,6 +3433,7 @@ static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev driver_name, adapter_num, port_num)); } else { tty_port_init(&info->port); + info->port.ops = &slgt_port_ops; info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 6bdb44f7bec2..7b0c5b2dd263 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -558,6 +558,7 @@ static void release_resources(SLMP_INFO *info); static int startup(SLMP_INFO *info); static int block_til_ready(struct tty_struct *tty, struct file * filp,SLMP_INFO *info); +static int carrier_raised(struct tty_port *port); static void shutdown(SLMP_INFO *info); static void program_hw(SLMP_INFO *info); static void change_params(SLMP_INFO *info); @@ -800,7 +801,7 @@ cleanup: */ static void close(struct tty_struct *tty, struct file *filp) { - SLMP_INFO * info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO * info = tty->driver_data; if (sanity_check(info, tty->name, "close")) return; @@ -809,70 +810,18 @@ static void close(struct tty_struct *tty, struct file *filp) printk("%s(%d):%s close() entry, count=%d\n", __FILE__,__LINE__, info->device_name, info->port.count); - if (!info->port.count) - return; - - if (tty_hung_up_p(filp)) - goto cleanup; - - if ((tty->count == 1) && (info->port.count != 1)) { - /* - * tty->count is 1 and the tty structure will be freed. - * info->port.count should be one in this case. - * if it's not, correct it so that the port is shutdown. - */ - printk("%s(%d):%s close: bad refcount; tty->count is 1, " - "info->port.count is %d\n", - __FILE__,__LINE__, info->device_name, info->port.count); - info->port.count = 1; - } - - info->port.count--; - - /* if at least one open remaining, leave hardware active */ - if (info->port.count) + if (tty_port_close_start(&info->port, tty, filp) == 0) goto cleanup; - - info->port.flags |= ASYNC_CLOSING; - - /* set tty->closing to notify line discipline to - * only process XON/XOFF characters. Only the N_TTY - * discipline appears to use this (ppp does not). - */ - tty->closing = 1; - - /* wait for transmit data to clear all layers */ - - if (info->port.closing_wait != ASYNC_CLOSING_WAIT_NONE) { - if (debug_level >= DEBUG_LEVEL_INFO) - printk("%s(%d):%s close() calling tty_wait_until_sent\n", - __FILE__,__LINE__, info->device_name ); - tty_wait_until_sent(tty, info->port.closing_wait); - } - + if (info->port.flags & ASYNC_INITIALIZED) wait_until_sent(tty, info->timeout); flush_buffer(tty); - tty_ldisc_flush(tty); - shutdown(info); - tty->closing = 0; + tty_port_close_end(&info->port, tty); info->port.tty = NULL; - - if (info->port.blocked_open) { - if (info->port.close_delay) { - msleep_interruptible(jiffies_to_msecs(info->port.close_delay)); - } - wake_up_interruptible(&info->port.open_wait); - } - - info->port.flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING); - - wake_up_interruptible(&info->port.close_wait); - cleanup: if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s close() exit, count=%d\n", __FILE__,__LINE__, @@ -884,7 +833,7 @@ cleanup: */ static void hangup(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s hangup()\n", @@ -907,7 +856,7 @@ static void hangup(struct tty_struct *tty) */ static void set_termios(struct tty_struct *tty, struct ktermios *old_termios) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -960,7 +909,7 @@ static int write(struct tty_struct *tty, const unsigned char *buf, int count) { int c, ret = 0; - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1038,7 +987,7 @@ cleanup: */ static int put_char(struct tty_struct *tty, unsigned char ch) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; int ret = 0; @@ -1075,7 +1024,7 @@ static int put_char(struct tty_struct *tty, unsigned char ch) */ static void send_xchar(struct tty_struct *tty, char ch) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1099,7 +1048,7 @@ static void send_xchar(struct tty_struct *tty, char ch) */ static void wait_until_sent(struct tty_struct *tty, int timeout) { - SLMP_INFO * info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO * info = tty->driver_data; unsigned long orig_jiffies, char_time; if (!info ) @@ -1166,7 +1115,7 @@ exit: */ static int write_room(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; int ret; if (sanity_check(info, tty->name, "write_room")) @@ -1193,7 +1142,7 @@ static int write_room(struct tty_struct *tty) */ static void flush_chars(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if ( debug_level >= DEBUG_LEVEL_INFO ) @@ -1232,7 +1181,7 @@ static void flush_chars(struct tty_struct *tty) */ static void flush_buffer(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1254,7 +1203,7 @@ static void flush_buffer(struct tty_struct *tty) */ static void tx_hold(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (sanity_check(info, tty->name, "tx_hold")) @@ -1274,7 +1223,7 @@ static void tx_hold(struct tty_struct *tty) */ static void tx_release(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (sanity_check(info, tty->name, "tx_release")) @@ -1304,7 +1253,7 @@ static void tx_release(struct tty_struct *tty) static int do_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; int error; struct mgsl_icount cnow; /* kernel counter temps */ struct serial_icounter_struct __user *p_cuser; /* user space */ @@ -1515,7 +1464,7 @@ done: */ static int chars_in_buffer(struct tty_struct *tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; if (sanity_check(info, tty->name, "chars_in_buffer")) return 0; @@ -1531,7 +1480,7 @@ static int chars_in_buffer(struct tty_struct *tty) */ static void throttle(struct tty_struct * tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1556,7 +1505,7 @@ static void throttle(struct tty_struct * tty) */ static void unthrottle(struct tty_struct * tty) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -1587,7 +1536,7 @@ static void unthrottle(struct tty_struct * tty) static int set_break(struct tty_struct *tty, int break_state) { unsigned char RegValue; - SLMP_INFO * info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO * info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -3269,7 +3218,7 @@ static int modem_input_wait(SLMP_INFO *info,int arg) */ static int tiocmget(struct tty_struct *tty, struct file *file) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned int result; unsigned long flags; @@ -3295,7 +3244,7 @@ static int tiocmget(struct tty_struct *tty, struct file *file) static int tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { - SLMP_INFO *info = (SLMP_INFO *)tty->driver_data; + SLMP_INFO *info = tty->driver_data; unsigned long flags; if (debug_level >= DEBUG_LEVEL_INFO) @@ -3318,7 +3267,28 @@ static int tiocmset(struct tty_struct *tty, struct file *file, return 0; } +static int carrier_raised(struct tty_port *port) +{ + SLMP_INFO *info = container_of(port, SLMP_INFO, port); + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + get_signals(info); + spin_unlock_irqrestore(&info->lock,flags); + return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; +} + +static void raise_dtr_rts(struct tty_port *port) +{ + SLMP_INFO *info = container_of(port, SLMP_INFO, port); + unsigned long flags; + + spin_lock_irqsave(&info->lock,flags); + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + set_signals(info); + spin_unlock_irqrestore(&info->lock,flags); +} /* Block the current process until the specified port is ready to open. */ @@ -3330,6 +3300,8 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, bool do_clocal = false; bool extra_count = false; unsigned long flags; + int cd; + struct tty_port *port = &info->port; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s block_til_ready()\n", @@ -3338,7 +3310,7 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){ /* nonblock mode is set or port is not enabled */ /* just verify that callout device is not active */ - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return 0; } @@ -3347,50 +3319,42 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, /* Wait for carrier detect and the line to become * free (i.e., not in use by the callout). While we are in - * this loop, info->port.count is dropped by one, so that + * this loop, port->count is dropped by one, so that * close() knows when to free things. We restore it upon * exit, either normal or abnormal. */ retval = 0; - add_wait_queue(&info->port.open_wait, &wait); + add_wait_queue(&port->open_wait, &wait); if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s block_til_ready() before block, count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); spin_lock_irqsave(&info->lock, flags); if (!tty_hung_up_p(filp)) { extra_count = true; - info->port.count--; + port->count--; } spin_unlock_irqrestore(&info->lock, flags); - info->port.blocked_open++; + port->blocked_open++; while (1) { - if ((tty->termios->c_cflag & CBAUD)) { - spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; - set_signals(info); - spin_unlock_irqrestore(&info->lock,flags); - } + if (tty->termios->c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); set_current_state(TASK_INTERRUPTIBLE); - if (tty_hung_up_p(filp) || !(info->port.flags & ASYNC_INITIALIZED)){ - retval = (info->port.flags & ASYNC_HUP_NOTIFY) ? + if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)){ + retval = (port->flags & ASYNC_HUP_NOTIFY) ? -EAGAIN : -ERESTARTSYS; break; } - spin_lock_irqsave(&info->lock,flags); - get_signals(info); - spin_unlock_irqrestore(&info->lock,flags); + cd = tty_port_carrier_raised(port); - if (!(info->port.flags & ASYNC_CLOSING) && - (do_clocal || (info->serial_signals & SerialSignal_DCD)) ) { + if (!(port->flags & ASYNC_CLOSING) && (do_clocal || cd)) break; - } if (signal_pending(current)) { retval = -ERESTARTSYS; @@ -3399,24 +3363,24 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp, if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s block_til_ready() count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); schedule(); } set_current_state(TASK_RUNNING); - remove_wait_queue(&info->port.open_wait, &wait); + remove_wait_queue(&port->open_wait, &wait); if (extra_count) - info->port.count++; - info->port.blocked_open--; + port->count++; + port->blocked_open--; if (debug_level >= DEBUG_LEVEL_INFO) printk("%s(%d):%s block_til_ready() after, count=%d\n", - __FILE__,__LINE__, tty->driver->name, info->port.count ); + __FILE__,__LINE__, tty->driver->name, port->count ); if (!retval) - info->port.flags |= ASYNC_NORMAL_ACTIVE; + port->flags |= ASYNC_NORMAL_ACTIVE; return retval; } @@ -3782,6 +3746,11 @@ static void add_device(SLMP_INFO *info) #endif } +static const struct tty_port_operations port_ops = { + .carrier_raised = carrier_raised, + .raise_dtr_rts = raise_dtr_rts, +}; + /* Allocate and initialize a device instance structure * * Return Value: pointer to SLMP_INFO if success, otherwise NULL @@ -3798,6 +3767,7 @@ static SLMP_INFO *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev) __FILE__,__LINE__, adapter_num, port_num); } else { tty_port_init(&info->port); + info->port.ops = &port_ops; info->magic = MGSL_MAGIC; INIT_WORK(&info->task, bh_handler); info->max_frame_size = 4096; @@ -3940,6 +3910,7 @@ static const struct tty_operations ops = { .tiocmset = tiocmset, }; + static void synclinkmp_cleanup(void) { int rc; diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index db15f9ba7c0b..d33e5ab06177 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1111,9 +1111,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) * Locks the line discipline as required * Writes to the tty driver are serialized by the atomic_write_lock * and are then processed in chunks to the device. The line discipline - * write method will not be involked in parallel for each device - * The line discipline write method is called under the big - * kernel lock for historical reasons. New code should not rely on this. + * write method will not be invoked in parallel for each device. */ static ssize_t tty_write(struct file *file, const char __user *buf, @@ -1213,7 +1211,7 @@ static void tty_line_name(struct tty_driver *driver, int index, char *p) * be held until the 'fast-open' is also done. Will change once we * have refcounting in the driver and per driver locking */ -struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver, +static struct tty_struct *tty_driver_lookup_tty(struct tty_driver *driver, struct inode *inode, int idx) { struct tty_struct *tty; @@ -2050,7 +2048,6 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg) /** * tty_do_resize - resize event * @tty: tty being resized - * @real_tty: real tty (not the same as tty if using a pty/tty pair) * @rows: rows (character) * @cols: cols (character) * @@ -2058,41 +2055,34 @@ static int tiocgwinsz(struct tty_struct *tty, struct winsize __user *arg) * peform a terminal resize correctly */ -int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws) +int tty_do_resize(struct tty_struct *tty, struct winsize *ws) { - struct pid *pgrp, *rpgrp; + struct pid *pgrp; unsigned long flags; - /* For a PTY we need to lock the tty side */ - mutex_lock(&real_tty->termios_mutex); - if (!memcmp(ws, &real_tty->winsize, sizeof(*ws))) + /* Lock the tty */ + mutex_lock(&tty->termios_mutex); + if (!memcmp(ws, &tty->winsize, sizeof(*ws))) goto done; /* Get the PID values and reference them so we can avoid holding the tty ctrl lock while sending signals */ spin_lock_irqsave(&tty->ctrl_lock, flags); pgrp = get_pid(tty->pgrp); - rpgrp = get_pid(real_tty->pgrp); spin_unlock_irqrestore(&tty->ctrl_lock, flags); if (pgrp) kill_pgrp(pgrp, SIGWINCH, 1); - if (rpgrp != pgrp && rpgrp) - kill_pgrp(rpgrp, SIGWINCH, 1); - put_pid(pgrp); - put_pid(rpgrp); tty->winsize = *ws; - real_tty->winsize = *ws; done: - mutex_unlock(&real_tty->termios_mutex); + mutex_unlock(&tty->termios_mutex); return 0; } /** * tiocswinsz - implement window size set ioctl - * @tty; tty + * @tty; tty side of tty * @arg: user buffer for result * * Copies the user idea of the window size to the kernel. Traditionally @@ -2105,17 +2095,16 @@ done: * then calls into the default method. */ -static int tiocswinsz(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize __user *arg) +static int tiocswinsz(struct tty_struct *tty, struct winsize __user *arg) { struct winsize tmp_ws; if (copy_from_user(&tmp_ws, arg, sizeof(*arg))) return -EFAULT; if (tty->ops->resize) - return tty->ops->resize(tty, real_tty, &tmp_ws); + return tty->ops->resize(tty, &tmp_ws); else - return tty_do_resize(tty, real_tty, &tmp_ws); + return tty_do_resize(tty, &tmp_ws); } /** @@ -2540,7 +2529,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGWINSZ: return tiocgwinsz(real_tty, p); case TIOCSWINSZ: - return tiocswinsz(tty, real_tty, p); + return tiocswinsz(real_tty, p); case TIOCCONS: return real_tty != tty ? -EINVAL : tioccons(file); case FIONBIO: @@ -2785,6 +2774,8 @@ void initialize_tty_struct(struct tty_struct *tty, INIT_WORK(&tty->hangup_work, do_tty_hangup); mutex_init(&tty->atomic_read_lock); mutex_init(&tty->atomic_write_lock); + mutex_init(&tty->output_lock); + mutex_init(&tty->echo_lock); spin_lock_init(&tty->read_lock); spin_lock_init(&tty->ctrl_lock); INIT_LIST_HEAD(&tty->tty_files); diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index f307f135cbfb..7a84b406a952 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -316,8 +316,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - if (tty->ldisc.refcount == 0) - printk(KERN_ERR "tty_ldisc_ref_wait\n"); + WARN_ON(tty->ldisc.refcount == 0); return &tty->ldisc; } @@ -376,15 +375,17 @@ EXPORT_SYMBOL_GPL(tty_ldisc_deref); * @tty: terminal to activate ldisc on * * Set the TTY_LDISC flag when the line discipline can be called - * again. Do necessary wakeups for existing sleepers. + * again. Do necessary wakeups for existing sleepers. Clear the LDISC + * changing flag to indicate any ldisc change is now over. * - * Note: nobody should set this bit except via this function. Clearing - * directly is allowed. + * Note: nobody should set the TTY_LDISC bit except via this function. + * Clearing directly is allowed. */ void tty_ldisc_enable(struct tty_struct *tty) { set_bit(TTY_LDISC, &tty->flags); + clear_bit(TTY_LDISC_CHANGING, &tty->flags); wake_up(&tty_ldisc_wait); } @@ -496,7 +497,14 @@ restart: * reference to the line discipline. The TTY_LDISC bit * prevents anyone taking a reference once it is clear. * We need the lock to avoid racing reference takers. + * + * We must clear the TTY_LDISC bit here to avoid a livelock + * with a userspace app continually trying to use the tty in + * parallel to the change and re-referencing the tty. */ + clear_bit(TTY_LDISC, &tty->flags); + if (o_tty) + clear_bit(TTY_LDISC, &o_tty->flags); spin_lock_irqsave(&tty_ldisc_lock, flags); if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { @@ -528,7 +536,7 @@ restart: * If the TTY_LDISC bit is set, then we are racing against * another ldisc change */ - if (!test_bit(TTY_LDISC, &tty->flags)) { + if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { struct tty_ldisc *ld; spin_unlock_irqrestore(&tty_ldisc_lock, flags); tty_ldisc_put(new_ldisc.ops); @@ -536,10 +544,14 @@ restart: tty_ldisc_deref(ld); goto restart; } - - clear_bit(TTY_LDISC, &tty->flags); + /* + * This flag is used to avoid two parallel ldisc changes. Once + * open and close are fine grained locked this may work better + * as a mutex shared with the open/close/hup paths + */ + set_bit(TTY_LDISC_CHANGING, &tty->flags); if (o_tty) - clear_bit(TTY_LDISC, &o_tty->flags); + set_bit(TTY_LDISC_CHANGING, &o_tty->flags); spin_unlock_irqrestore(&tty_ldisc_lock, flags); /* diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index c8f8024cb40e..9b8004c72686 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -7,6 +7,7 @@ #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> +#include <linux/serial.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/slab.h> @@ -94,3 +95,227 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty) spin_unlock_irqrestore(&port->lock, flags); } EXPORT_SYMBOL(tty_port_tty_set); + +/** + * tty_port_hangup - hangup helper + * @port: tty port + * + * Perform port level tty hangup flag and count changes. Drop the tty + * reference. + */ + +void tty_port_hangup(struct tty_port *port) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + port->count = 0; + port->flags &= ~ASYNC_NORMAL_ACTIVE; + if (port->tty) + tty_kref_put(port->tty); + port->tty = NULL; + spin_unlock_irqrestore(&port->lock, flags); + wake_up_interruptible(&port->open_wait); +} +EXPORT_SYMBOL(tty_port_hangup); + +/** + * tty_port_carrier_raised - carrier raised check + * @port: tty port + * + * Wrapper for the carrier detect logic. For the moment this is used + * to hide some internal details. This will eventually become entirely + * internal to the tty port. + */ + +int tty_port_carrier_raised(struct tty_port *port) +{ + if (port->ops->carrier_raised == NULL) + return 1; + return port->ops->carrier_raised(port); +} +EXPORT_SYMBOL(tty_port_carrier_raised); + +/** + * tty_port_raise_dtr_rts - Riase DTR/RTS + * @port: tty port + * + * Wrapper for the DTR/RTS raise logic. For the moment this is used + * to hide some internal details. This will eventually become entirely + * internal to the tty port. + */ + +void tty_port_raise_dtr_rts(struct tty_port *port) +{ + if (port->ops->raise_dtr_rts) + port->ops->raise_dtr_rts(port); +} +EXPORT_SYMBOL(tty_port_raise_dtr_rts); + +/** + * tty_port_block_til_ready - Waiting logic for tty open + * @port: the tty port being opened + * @tty: the tty device being bound + * @filp: the file pointer of the opener + * + * Implement the core POSIX/SuS tty behaviour when opening a tty device. + * Handles: + * - hangup (both before and during) + * - non blocking open + * - rts/dtr/dcd + * - signals + * - port flags and counts + * + * The passed tty_port must implement the carrier_raised method if it can + * do carrier detect and the raise_dtr_rts method if it supports software + * management of these lines. Note that the dtr/rts raise is done each + * iteration as a hangup may have previously dropped them while we wait. + */ + +int tty_port_block_til_ready(struct tty_port *port, + struct tty_struct *tty, struct file *filp) +{ + int do_clocal = 0, retval; + unsigned long flags; + DECLARE_WAITQUEUE(wait, current); + int cd; + + /* block if port is in the process of being closed */ + if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { + interruptible_sleep_on(&port->close_wait); + if (port->flags & ASYNC_HUP_NOTIFY) + return -EAGAIN; + else + return -ERESTARTSYS; + } + + /* if non-blocking mode is set we can pass directly to open unless + the port has just hung up or is in another error state */ + if ((filp->f_flags & O_NONBLOCK) || + (tty->flags & (1 << TTY_IO_ERROR))) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + + if (C_CLOCAL(tty)) + do_clocal = 1; + + /* Block waiting until we can proceed. We may need to wait for the + carrier, but we must also wait for any close that is in progress + before the next open may complete */ + + retval = 0; + add_wait_queue(&port->open_wait, &wait); + + /* The port lock protects the port counts */ + spin_lock_irqsave(&port->lock, flags); + if (!tty_hung_up_p(filp)) + port->count--; + port->blocked_open++; + spin_unlock_irqrestore(&port->lock, flags); + + while (1) { + /* Indicate we are open */ + if (tty->termios->c_cflag & CBAUD) + tty_port_raise_dtr_rts(port); + + set_current_state(TASK_INTERRUPTIBLE); + /* Check for a hangup or uninitialised port. Return accordingly */ + if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) { + if (port->flags & ASYNC_HUP_NOTIFY) + retval = -EAGAIN; + else + retval = -ERESTARTSYS; + break; + } + /* Probe the carrier. For devices with no carrier detect this + will always return true */ + cd = tty_port_carrier_raised(port); + if (!(port->flags & ASYNC_CLOSING) && + (do_clocal || cd)) + break; + if (signal_pending(current)) { + retval = -ERESTARTSYS; + break; + } + schedule(); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&port->open_wait, &wait); + + /* Update counts. A parallel hangup will have set count to zero and + we must not mess that up further */ + spin_lock_irqsave(&port->lock, flags); + if (!tty_hung_up_p(filp)) + port->count++; + port->blocked_open--; + if (retval == 0) + port->flags |= ASYNC_NORMAL_ACTIVE; + spin_unlock_irqrestore(&port->lock, flags); + return 0; + +} +EXPORT_SYMBOL(tty_port_block_til_ready); + +int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp) +{ + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + if (tty_hung_up_p(filp)) { + spin_unlock_irqrestore(&port->lock, flags); + return 0; + } + + if( tty->count == 1 && port->count != 1) { + printk(KERN_WARNING + "tty_port_close_start: tty->count = 1 port count = %d.\n", + port->count); + port->count = 1; + } + if (--port->count < 0) { + printk(KERN_WARNING "tty_port_close_start: count = %d\n", + port->count); + port->count = 0; + } + + if (port->count) { + spin_unlock_irqrestore(&port->lock, flags); + return 0; + } + port->flags |= ASYNC_CLOSING; + tty->closing = 1; + spin_unlock_irqrestore(&port->lock, flags); + /* Don't block on a stalled port, just pull the chain */ + if (tty->flow_stopped) + tty_driver_flush_buffer(tty); + if (port->flags & ASYNC_INITIALIZED && + port->closing_wait != ASYNC_CLOSING_WAIT_NONE) + tty_wait_until_sent(tty, port->closing_wait); + return 1; +} +EXPORT_SYMBOL(tty_port_close_start); + +void tty_port_close_end(struct tty_port *port, struct tty_struct *tty) +{ + unsigned long flags; + + tty_ldisc_flush(tty); + + spin_lock_irqsave(&port->lock, flags); + tty->closing = 0; + + if (port->blocked_open) { + spin_unlock_irqrestore(&port->lock, flags); + if (port->close_delay) { + msleep_interruptible( + jiffies_to_msecs(port->close_delay)); + } + spin_lock_irqsave(&port->lock, flags); + wake_up_interruptible(&port->open_wait); + } + port->flags &= ~(ASYNC_NORMAL_ACTIVE | ASYNC_CLOSING); + wake_up_interruptible(&port->close_wait); + spin_unlock_irqrestore(&port->lock, flags); +} +EXPORT_SYMBOL(tty_port_close_end); diff --git a/drivers/char/vme_scc.c b/drivers/char/vme_scc.c index 1718b3c481db..0e8234bd0e19 100644 --- a/drivers/char/vme_scc.c +++ b/drivers/char/vme_scc.c @@ -69,7 +69,7 @@ static void scc_disable_tx_interrupts(void * ptr); static void scc_enable_tx_interrupts(void * ptr); static void scc_disable_rx_interrupts(void * ptr); static void scc_enable_rx_interrupts(void * ptr); -static int scc_get_CD(void * ptr); +static int scc_carrier_raised(struct tty_port *port); static void scc_shutdown_port(void * ptr); static int scc_set_real_termios(void *ptr); static void scc_hungup(void *ptr); @@ -100,7 +100,6 @@ static struct real_driver scc_real_driver = { scc_enable_tx_interrupts, scc_disable_rx_interrupts, scc_enable_rx_interrupts, - scc_get_CD, scc_shutdown_port, scc_set_real_termios, scc_chars_in_buffer, @@ -129,6 +128,10 @@ static const struct tty_operations scc_ops = { .break_ctl = scc_break_ctl, }; +static const struct tty_port_operations scc_port_ops = { + .carrier_raised = scc_carrier_raised, +}; + /*---------------------------------------------------------------------------- * vme_scc_init() and support functions *---------------------------------------------------------------------------*/ @@ -176,6 +179,8 @@ static void scc_init_portstructs(void) for (i = 0; i < 2; i++) { port = scc_ports + i; + tty_port_init(&port->gs.port); + port->gs.port.ops = &scc_port_ops; port->gs.magic = SCC_MAGIC; port->gs.close_delay = HZ/2; port->gs.closing_wait = 30 * HZ; @@ -624,10 +629,10 @@ static void scc_enable_rx_interrupts(void *ptr) } -static int scc_get_CD(void *ptr) +static int scc_carrier_raised(struct tty_port *port) { - struct scc_port *port = ptr; - unsigned channel = port->channel; + struct scc_port *sc = container_of(port, struct scc_port, gs.port); + unsigned channel = sc->channel; return !!(scc_last_status_reg[channel] & SR_DCD); } @@ -638,7 +643,7 @@ static void scc_shutdown_port(void *ptr) struct scc_port *port = ptr; port->gs.port.flags &= ~ GS_ACTIVE; - if (port->gs.port.tty && port->gs.port.tty->termios->c_cflag & HUPCL) { + if (port->gs.port.tty && (port->gs.port.tty->termios->c_cflag & HUPCL)) { scc_setsignals (port, 0, 0); } } @@ -779,7 +784,7 @@ static void scc_setsignals(struct scc_port *port, int dtr, int rts) static void scc_send_xchar(struct tty_struct *tty, char ch) { - struct scc_port *port = (struct scc_port *)tty->driver_data; + struct scc_port *port = tty->driver_data; port->x_char = ch; if (ch) @@ -896,7 +901,7 @@ static int scc_open (struct tty_struct * tty, struct file * filp) return retval; } - port->c_dcd = scc_get_CD (port); + port->c_dcd = tty_port_carrier_raised(&port->gs.port); scc_enable_rx_interrupts(port); @@ -906,7 +911,7 @@ static int scc_open (struct tty_struct * tty, struct file * filp) static void scc_throttle (struct tty_struct * tty) { - struct scc_port *port = (struct scc_port *)tty->driver_data; + struct scc_port *port = tty->driver_data; unsigned long flags; SCC_ACCESS_INIT(port); @@ -922,7 +927,7 @@ static void scc_throttle (struct tty_struct * tty) static void scc_unthrottle (struct tty_struct * tty) { - struct scc_port *port = (struct scc_port *)tty->driver_data; + struct scc_port *port = tty->driver_data; unsigned long flags; SCC_ACCESS_INIT(port); @@ -945,7 +950,7 @@ static int scc_ioctl(struct tty_struct *tty, struct file *file, static int scc_break_ctl(struct tty_struct *tty, int break_state) { - struct scc_port *port = (struct scc_port *)tty->driver_data; + struct scc_port *port = tty->driver_data; unsigned long flags; SCC_ACCESS_INIT(port); diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 008176edbd64..80014213fb53 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -819,8 +819,8 @@ static inline int resize_screen(struct vc_data *vc, int width, int height, * ctrl_lock of the tty IFF a tty is passed. */ -static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, - struct vc_data *vc, unsigned int cols, unsigned int lines) +static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, + unsigned int cols, unsigned int lines) { unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0; unsigned int old_cols, old_rows, old_row_size, old_screen_size; @@ -932,7 +932,7 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, ws.ws_row = vc->vc_rows; ws.ws_col = vc->vc_cols; ws.ws_ypixel = vc->vc_scan_lines; - tty_do_resize(tty, real_tty, &ws); + tty_do_resize(tty, &ws); } if (CON_IS_VISIBLE(vc)) @@ -954,13 +954,12 @@ static int vc_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows) { - return vc_do_resize(vc->vc_tty, vc->vc_tty, vc, cols, rows); + return vc_do_resize(vc->vc_tty, vc, cols, rows); } /** * vt_resize - resize a VT * @tty: tty to resize - * @real_tty: tty if a pty/tty pair * @ws: winsize attributes * * Resize a virtual terminal. This is called by the tty layer as we @@ -971,14 +970,13 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows) * termios_mutex and the tty ctrl_lock in that order. */ -int vt_resize(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws) +int vt_resize(struct tty_struct *tty, struct winsize *ws) { struct vc_data *vc = tty->driver_data; int ret; acquire_console_sem(); - ret = vc_do_resize(tty, real_tty, vc, ws->ws_col, ws->ws_row); + ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row); release_console_sem(); return ret; } @@ -2679,7 +2677,7 @@ static int con_write_room(struct tty_struct *tty) { if (tty->stopped) return 0; - return 4096; /* No limit, really; we're not buffering */ + return 32768; /* No limit, really; we're not buffering */ } static int con_chars_in_buffer(struct tty_struct *tty) diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 8944ce508e2f..a2dee0eb6dad 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -366,7 +366,7 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_ int vt_ioctl(struct tty_struct *tty, struct file * file, unsigned int cmd, unsigned long arg) { - struct vc_data *vc = (struct vc_data *)tty->driver_data; + struct vc_data *vc = tty->driver_data; struct console_font_op op; /* used in multiple places here */ struct kbd_struct * kbd; unsigned int console; diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index f450588e5858..254f1064d973 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -154,7 +154,6 @@ static struct tc_clkevt_device clkevt = { .shift = 32, /* Should be lower than at91rm9200's system timer */ .rating = 125, - .cpumask = CPU_MASK_CPU0, .set_next_event = tc_next_event, .set_mode = tc_mode, }, @@ -195,6 +194,7 @@ static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) clkevt.clkevt.max_delta_ns = clockevent_delta2ns(0xffff, &clkevt.clkevt); clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1; + clkevt.clkevt.cpumask = cpumask_of(0); setup_irq(irq, &tc_irqaction); diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index c9f21e3d4ead..4ee85fcf9aaf 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -137,6 +137,7 @@ config BLK_DEV_DELKIN config BLK_DEV_IDECD tristate "Include IDE/ATAPI CDROM support" + select IDE_ATAPI ---help--- If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is a newer protocol used by IDE CD-ROM and TAPE drives, similar to the @@ -185,23 +186,6 @@ config BLK_DEV_IDETAPE To compile this driver as a module, choose M here: the module will be called ide-tape. -config BLK_DEV_IDESCSI - tristate "SCSI emulation support (DEPRECATED)" - depends on SCSI - select IDE_ATAPI - ---help--- - WARNING: ide-scsi is no longer needed for cd writing applications! - The 2.6 kernel supports direct writing to ide-cd, which eliminates - the need for ide-scsi + the entire scsi stack just for writing a - cd. The new method is more efficient in every way. - - This will provide SCSI host adapter emulation for IDE ATAPI devices, - and will allow you to use a SCSI device driver instead of a native - ATAPI driver. - - If both this SCSI emulation and native ATAPI support are compiled - into the kernel, the native support will be used. - config BLK_DEV_IDEACPI bool "IDE ACPI support" depends on ACPI diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 177e3f8523ed..410728992e6a 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -5,7 +5,7 @@ EXTRA_CFLAGS += -Idrivers/ide ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \ - ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o + ide-taskfile.o ide-pm.o ide-park.o ide-pio-blacklist.o ide-sysfs.o # core IDE code ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 4e58b9e7a58a..e8688c0f8645 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -3,6 +3,7 @@ */ #include <linux/kernel.h> +#include <linux/cdrom.h> #include <linux/delay.h> #include <linux/ide.h> #include <scsi/scsi.h> @@ -14,6 +15,13 @@ #define debug_log(fmt, args...) do {} while (0) #endif +#define ATAPI_MIN_CDB_BYTES 12 + +static inline int dev_is_idecd(ide_drive_t *drive) +{ + return drive->media == ide_cdrom || drive->media == ide_optical; +} + /* * Check whether we can support a device, * based on the ATAPI IDENTIFY command results. @@ -233,18 +241,49 @@ void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) } EXPORT_SYMBOL_GPL(ide_retry_pc); -int ide_scsi_expiry(ide_drive_t *drive) +int ide_cd_expiry(ide_drive_t *drive) { - struct ide_atapi_pc *pc = drive->pc; + struct request *rq = HWGROUP(drive)->rq; + unsigned long wait = 0; - debug_log("%s called for %lu at %lu\n", __func__, - pc->scsi_cmd->serial_number, jiffies); + debug_log("%s: rq->cmd[0]: 0x%x\n", __func__, rq->cmd[0]); - pc->flags |= PC_FLAG_TIMEDOUT; + /* + * Some commands are *slow* and normally take a long time to complete. + * Usually we can use the ATAPI "disconnect" to bypass this, but not all + * commands/drives support that. Let ide_timer_expiry keep polling us + * for these. + */ + switch (rq->cmd[0]) { + case GPCMD_BLANK: + case GPCMD_FORMAT_UNIT: + case GPCMD_RESERVE_RZONE_TRACK: + case GPCMD_CLOSE_TRACK: + case GPCMD_FLUSH_CACHE: + wait = ATAPI_WAIT_PC; + break; + default: + if (!(rq->cmd_flags & REQ_QUIET)) + printk(KERN_INFO "cmd 0x%x timed out\n", + rq->cmd[0]); + wait = 0; + break; + } + return wait; +} +EXPORT_SYMBOL_GPL(ide_cd_expiry); - return 0; /* we do not want the IDE subsystem to retry */ +int ide_cd_get_xferlen(struct request *rq) +{ + if (blk_fs_request(rq)) + return 32768; + else if (blk_sense_request(rq) || blk_pc_request(rq) || + rq->cmd_type == REQ_TYPE_ATA_PC) + return rq->data_len; + else + return 0; } -EXPORT_SYMBOL_GPL(ide_scsi_expiry); +EXPORT_SYMBOL_GPL(ide_cd_get_xferlen); /* * This is the usual interrupt handler which will be called during a packet @@ -258,21 +297,14 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct request *rq = hwif->hwgroup->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; xfer_func_t *xferfunc; - ide_expiry_t *expiry; unsigned int timeout, temp; u16 bcount; - u8 stat, ireason, scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI), dsc = 0; + u8 stat, ireason, dsc = 0; debug_log("Enter %s - interrupt handler\n", __func__); - if (scsi) { - timeout = ide_scsi_get_timeout(pc); - expiry = ide_scsi_expiry; - } else { - timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD - : WAIT_TAPE_CMD; - expiry = NULL; - } + timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD + : WAIT_TAPE_CMD; if (pc->flags & PC_FLAG_TIMEDOUT) { drive->pc_callback(drive, 0); @@ -284,8 +316,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { if (hwif->dma_ops->dma_end(drive) || - (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) { - if (drive->media == ide_floppy && !scsi) + (drive->media == ide_tape && (stat & ATA_ERR))) { + if (drive->media == ide_floppy) printk(KERN_ERR "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) ? "write" : "read"); @@ -307,7 +339,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); - if (drive->media == ide_tape && !scsi && + if (drive->media == ide_tape && (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE) stat &= ~ATA_ERR; @@ -315,11 +347,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) /* Error detected */ debug_log("%s: I/O error\n", drive->name); - if (drive->media != ide_tape || scsi) { + if (drive->media != ide_tape) pc->rq->errors++; - if (scsi) - goto cmd_finished; - } if (rq->cmd[0] == REQUEST_SENSE) { printk(KERN_ERR "%s: I/O error in request sense" @@ -335,7 +364,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) /* queued, but not started */ return ide_stopped; } -cmd_finished: pc->error = 0; if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) @@ -382,25 +410,8 @@ cmd_finished: "us more data than expected - " "discarding data\n", drive->name); - if (scsi) - temp = pc->buf_size - pc->xferred; - else - temp = 0; - if (temp) { - if (pc->sg) - drive->pc_io_buffers(drive, pc, - temp, 0); - else - tp_ops->input_data(drive, NULL, - pc->cur_pos, temp); - printk(KERN_ERR "%s: transferred %d of " - "%d bytes\n", - drive->name, - temp, bcount); - } - pc->xferred += temp; - pc->cur_pos += temp; - ide_pad_transfer(drive, 0, bcount - temp); + + ide_pad_transfer(drive, 0, bcount); goto next_irq; } debug_log("The device wants to send us more data than " @@ -410,14 +421,13 @@ cmd_finished: } else xferfunc = tp_ops->output_data; - if ((drive->media == ide_floppy && !scsi && !pc->buf) || - (drive->media == ide_tape && !scsi && pc->bh) || - (scsi && pc->sg)) { + if ((drive->media == ide_floppy && !pc->buf) || + (drive->media == ide_tape && pc->bh)) { int done = drive->pc_io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING)); /* FIXME: don't do partial completions */ - if (drive->media == ide_floppy && !scsi) + if (drive->media == ide_floppy) ide_end_request(drive, 1, done >> 9); } else xferfunc(drive, NULL, pc->cur_pos, bcount); @@ -430,7 +440,7 @@ cmd_finished: rq->cmd[0], bcount); next_irq: /* And set the interrupt handler again */ - ide_set_handler(drive, ide_pc_intr, timeout, expiry); + ide_set_handler(drive, ide_pc_intr, timeout, NULL); return ide_started; } @@ -479,11 +489,12 @@ static int ide_delayed_transfer_pc(ide_drive_t *drive) static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) { - struct ide_atapi_pc *pc = drive->pc; + struct ide_atapi_pc *uninitialized_var(pc); ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->hwgroup->rq; ide_expiry_t *expiry; unsigned int timeout; + int cmd_len; ide_startstop_t startstop; u8 ireason; @@ -493,101 +504,124 @@ static ide_startstop_t ide_transfer_pc(ide_drive_t *drive) return startstop; } - ireason = ide_read_ireason(drive); - if (drive->media == ide_tape && - (drive->dev_flags & IDE_DFLAG_SCSI) == 0) - ireason = ide_wait_ireason(drive, ireason); - - if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) { - printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " - "a packet command\n", drive->name); - return ide_do_reset(drive); + if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { + if (drive->dma) + drive->waiting_for_dma = 1; } - /* - * If necessary schedule the packet transfer to occur 'timeout' - * miliseconds later in ide_delayed_transfer_pc() after the device - * says it's ready for a packet. - */ - if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) { - timeout = drive->pc_delay; - expiry = &ide_delayed_transfer_pc; + if (dev_is_idecd(drive)) { + /* ATAPI commands get padded out to 12 bytes minimum */ + cmd_len = COMMAND_SIZE(rq->cmd[0]); + if (cmd_len < ATAPI_MIN_CDB_BYTES) + cmd_len = ATAPI_MIN_CDB_BYTES; + + timeout = rq->timeout; + expiry = ide_cd_expiry; } else { - if (drive->dev_flags & IDE_DFLAG_SCSI) { - timeout = ide_scsi_get_timeout(pc); - expiry = ide_scsi_expiry; + pc = drive->pc; + + cmd_len = ATAPI_MIN_CDB_BYTES; + + /* + * If necessary schedule the packet transfer to occur 'timeout' + * miliseconds later in ide_delayed_transfer_pc() after the + * device says it's ready for a packet. + */ + if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) { + timeout = drive->pc_delay; + expiry = &ide_delayed_transfer_pc; } else { timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD : WAIT_TAPE_CMD; expiry = NULL; } + + ireason = ide_read_ireason(drive); + if (drive->media == ide_tape) + ireason = ide_wait_ireason(drive, ireason); + + if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) { + printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " + "a packet command\n", drive->name); + + return ide_do_reset(drive); + } } /* Set the interrupt routine */ ide_set_handler(drive, ide_pc_intr, timeout, expiry); /* Begin DMA, if necessary */ - if (pc->flags & PC_FLAG_DMA_OK) { - pc->flags |= PC_FLAG_DMA_IN_PROGRESS; - hwif->dma_ops->dma_start(drive); + if (dev_is_idecd(drive)) { + if (drive->dma) + hwif->dma_ops->dma_start(drive); + } else { + if (pc->flags & PC_FLAG_DMA_OK) { + pc->flags |= PC_FLAG_DMA_IN_PROGRESS; + hwif->dma_ops->dma_start(drive); + } } /* Send the actual packet */ if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0) - hwif->tp_ops->output_data(drive, NULL, rq->cmd, 12); + hwif->tp_ops->output_data(drive, NULL, rq->cmd, cmd_len); return ide_started; } -ide_startstop_t ide_issue_pc(ide_drive_t *drive, unsigned int timeout, - ide_expiry_t *expiry) +ide_startstop_t ide_issue_pc(ide_drive_t *drive) { - struct ide_atapi_pc *pc = drive->pc; + struct ide_atapi_pc *pc; ide_hwif_t *hwif = drive->hwif; + ide_expiry_t *expiry = NULL; + unsigned int timeout; u32 tf_flags; u16 bcount; - u8 scsi = !!(drive->dev_flags & IDE_DFLAG_SCSI); - /* We haven't transferred any data yet */ - pc->xferred = 0; - pc->cur_pos = pc->buf; + if (dev_is_idecd(drive)) { + tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL; + bcount = ide_cd_get_xferlen(hwif->hwgroup->rq); + expiry = ide_cd_expiry; + timeout = ATAPI_WAIT_PC; - /* Request to transfer the entire buffer at once */ - if (drive->media == ide_tape && scsi == 0) - bcount = pc->req_xfer; - else - bcount = min(pc->req_xfer, 63 * 1024); + if (drive->dma) + drive->dma = !hwif->dma_ops->dma_setup(drive); + } else { + pc = drive->pc; - if (pc->flags & PC_FLAG_DMA_ERROR) { - pc->flags &= ~PC_FLAG_DMA_ERROR; - ide_dma_off(drive); - } + /* We haven't transferred any data yet */ + pc->xferred = 0; + pc->cur_pos = pc->buf; - if ((pc->flags & PC_FLAG_DMA_OK) && - (drive->dev_flags & IDE_DFLAG_USING_DMA)) { - if (scsi) - hwif->sg_mapped = 1; - drive->dma = !hwif->dma_ops->dma_setup(drive); - if (scsi) - hwif->sg_mapped = 0; - } + tf_flags = IDE_TFLAG_OUT_DEVICE; + bcount = ((drive->media == ide_tape) ? + pc->req_xfer : + min(pc->req_xfer, 63 * 1024)); - if (!drive->dma) - pc->flags &= ~PC_FLAG_DMA_OK; + if (pc->flags & PC_FLAG_DMA_ERROR) { + pc->flags &= ~PC_FLAG_DMA_ERROR; + ide_dma_off(drive); + } - if (scsi) - tf_flags = 0; - else if (drive->media == ide_cdrom || drive->media == ide_optical) - tf_flags = IDE_TFLAG_OUT_NSECT | IDE_TFLAG_OUT_LBAL; - else - tf_flags = IDE_TFLAG_OUT_DEVICE; + if ((pc->flags & PC_FLAG_DMA_OK) && + (drive->dev_flags & IDE_DFLAG_USING_DMA)) + drive->dma = !hwif->dma_ops->dma_setup(drive); + + if (!drive->dma) + pc->flags &= ~PC_FLAG_DMA_OK; + + timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD + : WAIT_TAPE_CMD; + } ide_pktcmd_tf_load(drive, tf_flags, bcount, drive->dma); /* Issue the packet command */ if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { + if (drive->dma) + drive->waiting_for_dma = 0; ide_execute_command(drive, ATA_CMD_PACKET, ide_transfer_pc, - timeout, NULL); + timeout, expiry); return ide_started; } else { ide_execute_pkt_cmd(drive); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 5daa4dd1b018..1a7410f88249 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -53,14 +53,6 @@ #include "ide-cd.h" -#define IDECD_DEBUG_LOG 1 - -#if IDECD_DEBUG_LOG -#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args) -#else -#define ide_debug_log(lvl, fmt, args...) do {} while (0) -#endif - static DEFINE_MUTEX(idecd_ref_mutex); static void ide_cd_release(struct kref *); @@ -519,37 +511,8 @@ end_request: return 1; } -static int cdrom_timer_expiry(ide_drive_t *drive) -{ - struct request *rq = HWGROUP(drive)->rq; - unsigned long wait = 0; - - ide_debug_log(IDE_DBG_RQ, "Call %s: rq->cmd[0]: 0x%x\n", __func__, - rq->cmd[0]); - - /* - * Some commands are *slow* and normally take a long time to complete. - * Usually we can use the ATAPI "disconnect" to bypass this, but not all - * commands/drives support that. Let ide_timer_expiry keep polling us - * for these. - */ - switch (rq->cmd[0]) { - case GPCMD_BLANK: - case GPCMD_FORMAT_UNIT: - case GPCMD_RESERVE_RZONE_TRACK: - case GPCMD_CLOSE_TRACK: - case GPCMD_FLUSH_CACHE: - wait = ATAPI_WAIT_PC; - break; - default: - if (!(rq->cmd_flags & REQ_QUIET)) - printk(KERN_INFO PFX "cmd 0x%x timed out\n", - rq->cmd[0]); - wait = 0; - break; - } - return wait; -} +static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *); +static ide_startstop_t cdrom_newpc_intr(ide_drive_t *); /* * Set up the device registers for transferring a packet command on DEV, @@ -559,11 +522,13 @@ static int cdrom_timer_expiry(ide_drive_t *drive) * called when the interrupt from the drive arrives. Otherwise, HANDLER * will be called immediately after the drive is prepared for the transfer. */ -static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, - int xferlen, - ide_handler_t *handler) +static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct request *rq = hwif->hwgroup->rq; + int xferlen; + + xferlen = ide_cd_get_xferlen(rq); ide_debug_log(IDE_DBG_PC, "Call %s, xferlen: %d\n", __func__, xferlen); @@ -581,13 +546,14 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, drive->waiting_for_dma = 0; /* packet command */ - ide_execute_command(drive, ATA_CMD_PACKET, handler, - ATAPI_WAIT_PC, cdrom_timer_expiry); + ide_execute_command(drive, ATA_CMD_PACKET, + cdrom_transfer_packet_command, + ATAPI_WAIT_PC, ide_cd_expiry); return ide_started; } else { ide_execute_pkt_cmd(drive); - return (*handler) (drive); + return cdrom_transfer_packet_command(drive); } } @@ -598,11 +564,10 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, * there's data ready. */ #define ATAPI_MIN_CDB_BYTES 12 -static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, - struct request *rq, - ide_handler_t *handler) +static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; + struct request *rq = hwif->hwgroup->rq; int cmd_len; ide_startstop_t startstop; @@ -629,7 +594,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, } /* arm the interrupt handler */ - ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry); + ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, ide_cd_expiry); /* ATAPI commands get padded out to 12 bytes minimum */ cmd_len = COMMAND_SIZE(rq->cmd[0]); @@ -717,8 +682,6 @@ static int ide_cd_check_transfer_size(ide_drive_t *drive, int len) return 1; } -static ide_startstop_t cdrom_newpc_intr(ide_drive_t *); - static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, struct request *rq) { @@ -761,20 +724,6 @@ static ide_startstop_t ide_cd_prepare_rw_request(ide_drive_t *drive, } /* - * Routine to send a read/write packet command to the drive. This is usually - * called directly from cdrom_start_{read,write}(). However, for drq_interrupt - * devices, it is called from an interrupt when the drive is ready to accept - * the command. - */ -static ide_startstop_t cdrom_start_rw_cont(ide_drive_t *drive) -{ - struct request *rq = drive->hwif->hwgroup->rq; - - /* send the command to the drive and return */ - return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); -} - -/* * Fix up a possibly partially-processed request so that we can start it over * entirely, or even put it back on the request queue. */ @@ -1096,7 +1045,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) } else { timeout = ATAPI_WAIT_PC; if (!blk_fs_request(rq)) - expiry = cdrom_timer_expiry; + expiry = ide_cd_expiry; } ide_set_handler(drive, cdrom_newpc_intr, timeout, expiry); @@ -1163,13 +1112,6 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) return ide_started; } -static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) -{ - struct request *rq = HWGROUP(drive)->rq; - - return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); -} - static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { @@ -1214,18 +1156,12 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, sector_t block) { - ide_handler_t *fn; - int xferlen; - ide_debug_log(IDE_DBG_RQ, "Call %s, rq->cmd[0]: 0x%x, " "rq->cmd_type: 0x%x, block: %llu\n", __func__, rq->cmd[0], rq->cmd_type, (unsigned long long)block); if (blk_fs_request(rq)) { - xferlen = 32768; - fn = cdrom_start_rw_cont; - if (cdrom_start_rw(drive, rq) == ide_stopped) return ide_stopped; @@ -1233,9 +1169,6 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return ide_stopped; } else if (blk_sense_request(rq) || blk_pc_request(rq) || rq->cmd_type == REQ_TYPE_ATA_PC) { - xferlen = rq->data_len; - fn = cdrom_do_newpc_cont; - if (!rq->timeout) rq->timeout = ATAPI_WAIT_PC; @@ -1250,7 +1183,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, return ide_stopped; } - return cdrom_start_packet_command(drive, xferlen, fn); + return cdrom_start_packet_command(drive); } /* diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index d5ce3362dbd1..bf676b262181 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -8,10 +8,14 @@ #include <linux/cdrom.h> #include <asm/byteorder.h> -/* - * typical timeout for packet command - */ -#define ATAPI_WAIT_PC (60 * HZ) +#define IDECD_DEBUG_LOG 0 + +#if IDECD_DEBUG_LOG +#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, args) +#else +#define ide_debug_log(lvl, fmt, args...) do {} while (0) +#endif + #define ATAPI_WAIT_WRITE_BUSY (10 * HZ) /************************************************************************/ diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index aeb1ad782f54..0a48e2dc53a2 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -197,7 +197,7 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive, pc->retries++; - return ide_issue_pc(drive, WAIT_FLOPPY_CMD, NULL); + return ide_issue_pc(drive); } void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) @@ -342,38 +342,38 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, * Look at the flexible disk page parameters. We ignore the CHS capacity * parameters and use the LBA parameters instead. */ -static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) +static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive, + struct ide_atapi_pc *pc) { struct ide_disk_obj *floppy = drive->driver_data; struct gendisk *disk = floppy->disk; - struct ide_atapi_pc pc; u8 *page; int capacity, lba_capacity; u16 transfer_rate, sector_size, cyls, rpm; u8 heads, sectors; - ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); + ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); - if (ide_queue_pc_tail(drive, disk, &pc)) { + if (ide_queue_pc_tail(drive, disk, pc)) { printk(KERN_ERR PFX "Can't get flexible disk page params\n"); return 1; } - if (pc.buf[3] & 0x80) + if (pc->buf[3] & 0x80) drive->dev_flags |= IDE_DFLAG_WP; else drive->dev_flags &= ~IDE_DFLAG_WP; set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP)); - page = &pc.buf[8]; + page = &pc->buf[8]; - transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); - sector_size = be16_to_cpup((__be16 *)&pc.buf[8 + 6]); - cyls = be16_to_cpup((__be16 *)&pc.buf[8 + 8]); - rpm = be16_to_cpup((__be16 *)&pc.buf[8 + 28]); - heads = pc.buf[8 + 4]; - sectors = pc.buf[8 + 5]; + transfer_rate = be16_to_cpup((__be16 *)&pc->buf[8 + 2]); + sector_size = be16_to_cpup((__be16 *)&pc->buf[8 + 6]); + cyls = be16_to_cpup((__be16 *)&pc->buf[8 + 8]); + rpm = be16_to_cpup((__be16 *)&pc->buf[8 + 28]); + heads = pc->buf[8 + 4]; + sectors = pc->buf[8 + 5]; capacity = cyls * heads * sectors * sector_size; @@ -499,7 +499,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) /* Clik! disk does not support get_flexible_disk_page */ if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) - (void) ide_floppy_get_flexible_disk_page(drive); + (void) ide_floppy_get_flexible_disk_page(drive, &pc); return rc; } diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c index 2bc51ff73fee..8f8be8546038 100644 --- a/drivers/ide/ide-floppy_ioctl.c +++ b/drivers/ide/ide-floppy_ioctl.c @@ -31,10 +31,11 @@ * On exit we set nformats to the number of records we've actually initialized. */ -static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) +static int ide_floppy_get_format_capacities(ide_drive_t *drive, + struct ide_atapi_pc *pc, + int __user *arg) { struct ide_disk_obj *floppy = drive->driver_data; - struct ide_atapi_pc pc; u8 header_len, desc_cnt; int i, blocks, length, u_array_size, u_index; int __user *argp; @@ -45,13 +46,13 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) if (u_array_size <= 0) return -EINVAL; - ide_floppy_create_read_capacity_cmd(&pc); - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) { + ide_floppy_create_read_capacity_cmd(pc); + if (ide_queue_pc_tail(drive, floppy->disk, pc)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return -EIO; } - header_len = pc.buf[3]; + header_len = pc->buf[3]; desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ u_index = 0; @@ -68,8 +69,8 @@ static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) if (u_index >= u_array_size) break; /* User-supplied buffer too small */ - blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); - length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); + blocks = be32_to_cpup((__be32 *)&pc->buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc->buf[desc_start + 6]); if (put_user(blocks, argp)) return -EFAULT; @@ -111,29 +112,28 @@ static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b, pc->flags |= PC_FLAG_WRITING; } -static int ide_floppy_get_sfrp_bit(ide_drive_t *drive) +static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc) { struct ide_disk_obj *floppy = drive->driver_data; - struct ide_atapi_pc pc; drive->atapi_flags &= ~IDE_AFLAG_SRFP; - ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE); - pc.flags |= PC_FLAG_SUPPRESS_ERROR; + ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE); + pc->flags |= PC_FLAG_SUPPRESS_ERROR; - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + if (ide_queue_pc_tail(drive, floppy->disk, pc)) return 1; - if (pc.buf[8 + 2] & 0x40) + if (pc->buf[8 + 2] & 0x40) drive->atapi_flags |= IDE_AFLAG_SRFP; return 0; } -static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg) +static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc, + int __user *arg) { struct ide_disk_obj *floppy = drive->driver_data; - struct ide_atapi_pc pc; int blocks, length, flags, err = 0; if (floppy->openers > 1) { @@ -166,10 +166,10 @@ static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg) goto out; } - (void)ide_floppy_get_sfrp_bit(drive); - ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags); + ide_floppy_get_sfrp_bit(drive, pc); + ide_floppy_create_format_unit_cmd(pc, blocks, length, flags); - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + if (ide_queue_pc_tail(drive, floppy->disk, pc)) err = -EIO; out: @@ -188,15 +188,16 @@ out: * the dsc bit, and return either 0 or 65536. */ -static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg) +static int ide_floppy_get_format_progress(ide_drive_t *drive, + struct ide_atapi_pc *pc, + int __user *arg) { struct ide_disk_obj *floppy = drive->driver_data; - struct ide_atapi_pc pc; int progress_indication = 0x10000; if (drive->atapi_flags & IDE_AFLAG_SRFP) { - ide_create_request_sense_cmd(drive, &pc); - if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + ide_create_request_sense_cmd(drive, pc); + if (ide_queue_pc_tail(drive, floppy->disk, pc)) return -EIO; if (floppy->sense_key == 2 && @@ -241,20 +242,21 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc, return 0; } -static int ide_floppy_format_ioctl(ide_drive_t *drive, fmode_t mode, - unsigned int cmd, void __user *argp) +static int ide_floppy_format_ioctl(ide_drive_t *drive, struct ide_atapi_pc *pc, + fmode_t mode, unsigned int cmd, + void __user *argp) { switch (cmd) { case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: return 0; case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: - return ide_floppy_get_format_capacities(drive, argp); + return ide_floppy_get_format_capacities(drive, pc, argp); case IDEFLOPPY_IOCTL_FORMAT_START: if (!(mode & FMODE_WRITE)) return -EPERM; - return ide_floppy_format_unit(drive, (int __user *)argp); + return ide_floppy_format_unit(drive, pc, (int __user *)argp); case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: - return ide_floppy_get_format_progress(drive, argp); + return ide_floppy_get_format_progress(drive, pc, argp); default: return -ENOTTY; } @@ -270,7 +272,7 @@ int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev, if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) return ide_floppy_lockdoor(drive, &pc, arg, cmd); - err = ide_floppy_format_ioctl(drive, mode, cmd, argp); + err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp); if (err != -ENOTTY) return err; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index ecacc008fdaf..1c36a8e83d36 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -426,9 +426,6 @@ void ide_map_sg(ide_drive_t *drive, struct request *rq) ide_hwif_t *hwif = drive->hwif; struct scatterlist *sg = hwif->sg_table; - if (hwif->sg_mapped) /* needed by ide-scsi */ - return; - if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) { hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } else { @@ -667,85 +664,10 @@ void ide_stall_queue (ide_drive_t *drive, unsigned long timeout) drive->sleep = timeout + jiffies; drive->dev_flags |= IDE_DFLAG_SLEEPING; } - EXPORT_SYMBOL(ide_stall_queue); -#define WAKEUP(drive) ((drive)->service_start + 2 * (drive)->service_time) - -/** - * choose_drive - select a drive to service - * @hwgroup: hardware group to select on - * - * choose_drive() selects the next drive which will be serviced. - * This is necessary because the IDE layer can't issue commands - * to both drives on the same cable, unlike SCSI. - */ - -static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup) -{ - ide_drive_t *drive, *best; - -repeat: - best = NULL; - drive = hwgroup->drive; - - /* - * drive is doing pre-flush, ordered write, post-flush sequence. even - * though that is 3 requests, it must be seen as a single transaction. - * we must not preempt this drive until that is complete - */ - if (blk_queue_flushing(drive->queue)) { - /* - * small race where queue could get replugged during - * the 3-request flush cycle, just yank the plug since - * we want it to finish asap - */ - blk_remove_plug(drive->queue); - return drive; - } - - do { - u8 dev_s = !!(drive->dev_flags & IDE_DFLAG_SLEEPING); - u8 best_s = (best && !!(best->dev_flags & IDE_DFLAG_SLEEPING)); - - if ((dev_s == 0 || time_after_eq(jiffies, drive->sleep)) && - !elv_queue_empty(drive->queue)) { - if (best == NULL || - (dev_s && (best_s == 0 || time_before(drive->sleep, best->sleep))) || - (best_s == 0 && time_before(WAKEUP(drive), WAKEUP(best)))) { - if (!blk_queue_plugged(drive->queue)) - best = drive; - } - } - } while ((drive = drive->next) != hwgroup->drive); - - if (best && (best->dev_flags & IDE_DFLAG_NICE1) && - (best->dev_flags & IDE_DFLAG_SLEEPING) == 0 && - best != hwgroup->drive && best->service_time > WAIT_MIN_SLEEP) { - long t = (signed long)(WAKEUP(best) - jiffies); - if (t >= WAIT_MIN_SLEEP) { - /* - * We *may* have some time to spare, but first let's see if - * someone can potentially benefit from our nice mood today.. - */ - drive = best->next; - do { - if ((drive->dev_flags & IDE_DFLAG_SLEEPING) == 0 - && time_before(jiffies - best->service_time, WAKEUP(drive)) - && time_before(WAKEUP(drive), jiffies + t)) - { - ide_stall_queue(best, min_t(long, t, 10 * WAIT_MIN_SLEEP)); - goto repeat; - } - } while ((drive = drive->next) != best); - } - } - return best; -} - /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&hwgroup->lock, ..); * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -757,8 +679,7 @@ repeat: * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses a per-hwgroup spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses a per-hwgroup lock to protect the hwgroup->busy flag. * * The first thread into the driver for a particular hwgroup sets the * hwgroup->busy flag to indicate that this hwgroup is now active, @@ -778,69 +699,41 @@ repeat: * the driver. This makes the driver much more friendlier to shared IRQs * than previous designs, while remaining 100% (?) SMP safe and capable. */ -static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) +void do_ide_request(struct request_queue *q) { - ide_drive_t *drive; - ide_hwif_t *hwif; + ide_drive_t *drive = q->queuedata; + ide_hwif_t *hwif = drive->hwif; + ide_hwgroup_t *hwgroup = hwif->hwgroup; struct request *rq; ide_startstop_t startstop; - int loops = 0; - - /* caller must own hwgroup->lock */ - BUG_ON(!irqs_disabled()); - - while (!hwgroup->busy) { - hwgroup->busy = 1; - /* for atari only */ - ide_get_lock(ide_intr, hwgroup); - drive = choose_drive(hwgroup); - if (drive == NULL) { - int sleeping = 0; - unsigned long sleep = 0; /* shut up, gcc */ - hwgroup->rq = NULL; - drive = hwgroup->drive; - do { - if ((drive->dev_flags & IDE_DFLAG_SLEEPING) && - (sleeping == 0 || - time_before(drive->sleep, sleep))) { - sleeping = 1; - sleep = drive->sleep; - } - } while ((drive = drive->next) != hwgroup->drive); - if (sleeping) { + + /* + * drive is doing pre-flush, ordered write, post-flush sequence. even + * though that is 3 requests, it must be seen as a single transaction. + * we must not preempt this drive until that is complete + */ + if (blk_queue_flushing(q)) /* - * Take a short snooze, and then wake up this hwgroup again. - * This gives other hwgroups on the same a chance to - * play fairly with us, just in case there are big differences - * in relative throughputs.. don't want to hog the cpu too much. + * small race where queue could get replugged during + * the 3-request flush cycle, just yank the plug since + * we want it to finish asap */ - if (time_before(sleep, jiffies + WAIT_MIN_SLEEP)) - sleep = jiffies + WAIT_MIN_SLEEP; -#if 1 - if (timer_pending(&hwgroup->timer)) - printk(KERN_CRIT "ide_set_handler: timer already active\n"); -#endif - /* so that ide_timer_expiry knows what to do */ - hwgroup->sleeping = 1; - hwgroup->req_gen_timer = hwgroup->req_gen; - mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 - * while sleeping */ - } else { - /* Ugly, but how can we sleep for the lock - * otherwise? perhaps from tq_disk? - */ + blk_remove_plug(q); - /* for atari only */ - ide_release_lock(); - hwgroup->busy = 0; - } + spin_unlock_irq(q->queue_lock); + spin_lock_irq(&hwgroup->lock); + + if (!ide_lock_hwgroup(hwgroup)) { +repeat: + hwgroup->rq = NULL; - /* no more work for this hwgroup (for now) */ - return; + if (drive->dev_flags & IDE_DFLAG_SLEEPING) { + if (time_before(drive->sleep, jiffies)) { + ide_unlock_hwgroup(hwgroup); + goto plug_device; + } } - again: - hwif = HWIF(drive); + if (hwif != hwgroup->hwif) { /* * set nIEN for previous hwif, drives in the @@ -852,16 +745,20 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) hwgroup->hwif = hwif; hwgroup->drive = drive; drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED); - drive->service_start = jiffies; + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); /* * we know that the queue isn't empty, but this can happen * if the q->prep_rq_fn() decides to kill a request */ rq = elv_next_request(drive->queue); + spin_unlock_irq(q->queue_lock); + spin_lock_irq(&hwgroup->lock); + if (!rq) { - hwgroup->busy = 0; - break; + ide_unlock_hwgroup(hwgroup); + goto out; } /* @@ -876,53 +773,36 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq) * though. I hope that doesn't happen too much, hopefully not * unless the subdriver triggers such a thing in its own PM * state machine. - * - * We count how many times we loop here to make sure we service - * all drives in the hwgroup without looping for ever */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && blk_pm_request(rq) == 0 && (rq->cmd_flags & REQ_PREEMPT) == 0) { - drive = drive->next ? drive->next : hwgroup->drive; - if (loops++ < 4 && !blk_queue_plugged(drive->queue)) - goto again; - /* We clear busy, there should be no pending ATA command at this point. */ - hwgroup->busy = 0; - break; + /* there should be no pending command at this point */ + ide_unlock_hwgroup(hwgroup); + goto plug_device; } hwgroup->rq = rq; - /* - * Some systems have trouble with IDE IRQs arriving while - * the driver is still setting things up. So, here we disable - * the IRQ used by this interface while the request is being started. - * This may look bad at first, but pretty much the same thing - * happens anyway when any interrupt comes in, IDE or otherwise - * -- the kernel masks the IRQ while it is being handled. - */ - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) - disable_irq_nosync(hwif->irq); - spin_unlock(&hwgroup->lock); - local_irq_enable_in_hardirq(); - /* allow other IRQs while we start this request */ + spin_unlock_irq(&hwgroup->lock); startstop = start_request(drive, rq); spin_lock_irq(&hwgroup->lock); - if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) - enable_irq(hwif->irq); + if (startstop == ide_stopped) - hwgroup->busy = 0; - } -} + goto repeat; + } else + goto plug_device; +out: + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); + return; -/* - * Passes the stuff to ide_do_request - */ -void do_ide_request(struct request_queue *q) -{ - ide_drive_t *drive = q->queuedata; +plug_device: + spin_unlock_irq(&hwgroup->lock); + spin_lock_irq(q->queue_lock); - ide_do_request(HWGROUP(drive), IDE_NO_IRQ); + if (!elv_queue_empty(q)) + blk_plug_device(q); } /* @@ -983,6 +863,17 @@ out: return ret; } +static void ide_plug_device(ide_drive_t *drive) +{ + struct request_queue *q = drive->queue; + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + if (!elv_queue_empty(q)) + blk_plug_device(q); + spin_unlock_irqrestore(q->queue_lock, flags); +} + /** * ide_timer_expiry - handle lack of an IDE interrupt * @data: timer callback magic (hwgroup) @@ -1000,10 +891,12 @@ out: void ide_timer_expiry (unsigned long data) { ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data; + ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; ide_expiry_t *expiry; unsigned long flags; unsigned long wait = -1; + int plug_device = 0; spin_lock_irqsave(&hwgroup->lock, flags); @@ -1015,22 +908,15 @@ void ide_timer_expiry (unsigned long data) * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } } else { - ide_drive_t *drive = hwgroup->drive; + drive = hwgroup->drive; if (!drive) { printk(KERN_ERR "ide_timer_expiry: hwgroup->drive was NULL\n"); hwgroup->handler = NULL; } else { ide_hwif_t *hwif; ide_startstop_t startstop = ide_stopped; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk(KERN_ERR "%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) > 0) { @@ -1071,15 +957,18 @@ void ide_timer_expiry (unsigned long data) ide_error(drive, "irq timeout", hwif->tp_ops->read_status(hwif)); } - drive->service_time = jiffies - drive->service_start; spin_lock_irq(&hwgroup->lock); enable_irq(hwif->irq); - if (startstop == ide_stopped) - hwgroup->busy = 0; + if (startstop == ide_stopped) { + ide_unlock_hwgroup(hwgroup); + plug_device = 1; + } } } - ide_do_request(hwgroup, IDE_NO_IRQ); spin_unlock_irqrestore(&hwgroup->lock, flags); + + if (plug_device) + ide_plug_device(drive); } /** @@ -1173,10 +1062,11 @@ irqreturn_t ide_intr (int irq, void *dev_id) unsigned long flags; ide_hwgroup_t *hwgroup = (ide_hwgroup_t *)dev_id; ide_hwif_t *hwif = hwgroup->hwif; - ide_drive_t *drive; + ide_drive_t *uninitialized_var(drive); ide_handler_t *handler; ide_startstop_t startstop; irqreturn_t irq_ret = IRQ_NONE; + int plug_device = 0; spin_lock_irqsave(&hwgroup->lock, flags); @@ -1241,10 +1131,6 @@ irqreturn_t ide_intr (int irq, void *dev_id) */ goto out; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk(KERN_ERR "%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); - } hwgroup->handler = NULL; hwgroup->req_gen++; del_timer(&hwgroup->timer); @@ -1267,20 +1153,22 @@ irqreturn_t ide_intr (int irq, void *dev_id) * same irq as is currently being serviced here, and Linux * won't allow another of the same (on any CPU) until we return. */ - drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; - ide_do_request(hwgroup, hwif->irq); - } else { - printk(KERN_ERR "%s: ide_intr: huh? expected NULL handler " - "on exit\n", drive->name); - } + ide_unlock_hwgroup(hwgroup); + plug_device = 1; + } else + printk(KERN_ERR "%s: %s: huh? expected NULL handler " + "on exit\n", __func__, drive->name); } out_handled: irq_ret = IRQ_HANDLED; out: spin_unlock_irqrestore(&hwgroup->lock, flags); + + if (plug_device) + ide_plug_device(drive); + return irq_ret; } diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 28232c64c346..1be263eb9c07 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -95,8 +95,7 @@ static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg) return -EPERM; if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) && - (drive->media != ide_tape || - (drive->dev_flags & IDE_DFLAG_SCSI))) + (drive->media != ide_tape)) return -EPERM; if ((arg >> IDE_NICE_DSC_OVERLAP) & 1) diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 63d01c55f865..678454ac2483 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -16,16 +16,19 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) spin_lock_irq(&hwgroup->lock); if (drive->dev_flags & IDE_DFLAG_PARKED) { int reset_timer = time_before(timeout, drive->sleep); + int start_queue = 0; drive->sleep = timeout; wake_up_all(&ide_park_wq); - if (reset_timer && hwgroup->sleeping && - del_timer(&hwgroup->timer)) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; + if (reset_timer && del_timer(&hwgroup->timer)) + start_queue = 1; + spin_unlock_irq(&hwgroup->lock); + + if (start_queue) { + spin_lock_irq(q->queue_lock); blk_start_queueing(q); + spin_unlock_irq(q->queue_lock); } - spin_unlock_irq(&hwgroup->lock); return; } spin_unlock_irq(&hwgroup->lock); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index a64ec259f3d1..c5adb7b9c5b5 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -101,6 +101,82 @@ static void ide_disk_init_mult_count(ide_drive_t *drive) } } +static void ide_classify_ata_dev(ide_drive_t *drive) +{ + u16 *id = drive->id; + char *m = (char *)&id[ATA_ID_PROD]; + int is_cfa = ata_id_is_cfa(id); + + /* CF devices are *not* removable in Linux definition of the term */ + if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7))) + drive->dev_flags |= IDE_DFLAG_REMOVABLE; + + drive->media = ide_disk; + + if (!ata_id_has_unload(drive->id)) + drive->dev_flags |= IDE_DFLAG_NO_UNLOAD; + + printk(KERN_INFO "%s: %s, %s DISK drive\n", drive->name, m, + is_cfa ? "CFA" : "ATA"); +} + +static void ide_classify_atapi_dev(ide_drive_t *drive) +{ + u16 *id = drive->id; + char *m = (char *)&id[ATA_ID_PROD]; + u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f; + + printk(KERN_INFO "%s: %s, ATAPI ", drive->name, m); + switch (type) { + case ide_floppy: + if (!strstr(m, "CD-ROM")) { + if (!strstr(m, "oppy") && + !strstr(m, "poyp") && + !strstr(m, "ZIP")) + printk(KERN_CONT "cdrom or floppy?, assuming "); + if (drive->media != ide_cdrom) { + printk(KERN_CONT "FLOPPY"); + drive->dev_flags |= IDE_DFLAG_REMOVABLE; + break; + } + } + /* Early cdrom models used zero */ + type = ide_cdrom; + case ide_cdrom: + drive->dev_flags |= IDE_DFLAG_REMOVABLE; +#ifdef CONFIG_PPC + /* kludge for Apple PowerBook internal zip */ + if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) { + printk(KERN_CONT "FLOPPY"); + type = ide_floppy; + break; + } +#endif + printk(KERN_CONT "CD/DVD-ROM"); + break; + case ide_tape: + printk(KERN_CONT "TAPE"); + break; + case ide_optical: + printk(KERN_CONT "OPTICAL"); + drive->dev_flags |= IDE_DFLAG_REMOVABLE; + break; + default: + printk(KERN_CONT "UNKNOWN (type %d)", type); + break; + } + + printk(KERN_CONT " drive\n"); + drive->media = type; + /* an ATAPI device ignores DRDY */ + drive->ready_stat = 0; + if (ata_id_cdb_intr(id)) + drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; + drive->dev_flags |= IDE_DFLAG_DOORLOCKING; + /* we don't do head unloading on ATAPI devices */ + drive->dev_flags |= IDE_DFLAG_NO_UNLOAD; +} + /** * do_identify - identify a drive * @drive: drive to identify @@ -117,7 +193,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd) u16 *id = drive->id; char *m = (char *)&id[ATA_ID_PROD]; unsigned long flags; - int bswap = 1, is_cfa; + int bswap = 1; /* local CPU only; some systems need this */ local_irq_save(flags); @@ -154,91 +230,23 @@ static void do_identify(ide_drive_t *drive, u8 cmd) if (strstr(m, "E X A B Y T E N E S T")) goto err_misc; - printk(KERN_INFO "%s: %s, ", drive->name, m); - drive->dev_flags |= IDE_DFLAG_PRESENT; drive->dev_flags &= ~IDE_DFLAG_DEAD; /* * Check for an ATAPI device */ - if (cmd == ATA_CMD_ID_ATAPI) { - u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f; - - printk(KERN_CONT "ATAPI "); - switch (type) { - case ide_floppy: - if (!strstr(m, "CD-ROM")) { - if (!strstr(m, "oppy") && - !strstr(m, "poyp") && - !strstr(m, "ZIP")) - printk(KERN_CONT "cdrom or floppy?, assuming "); - if (drive->media != ide_cdrom) { - printk(KERN_CONT "FLOPPY"); - drive->dev_flags |= IDE_DFLAG_REMOVABLE; - break; - } - } - /* Early cdrom models used zero */ - type = ide_cdrom; - case ide_cdrom: - drive->dev_flags |= IDE_DFLAG_REMOVABLE; -#ifdef CONFIG_PPC - /* kludge for Apple PowerBook internal zip */ - if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) { - printk(KERN_CONT "FLOPPY"); - type = ide_floppy; - break; - } -#endif - printk(KERN_CONT "CD/DVD-ROM"); - break; - case ide_tape: - printk(KERN_CONT "TAPE"); - break; - case ide_optical: - printk(KERN_CONT "OPTICAL"); - drive->dev_flags |= IDE_DFLAG_REMOVABLE; - break; - default: - printk(KERN_CONT "UNKNOWN (type %d)", type); - break; - } - printk(KERN_CONT " drive\n"); - drive->media = type; - /* an ATAPI device ignores DRDY */ - drive->ready_stat = 0; - if (ata_id_cdb_intr(id)) - drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; - drive->dev_flags |= IDE_DFLAG_DOORLOCKING; - /* we don't do head unloading on ATAPI devices */ - drive->dev_flags |= IDE_DFLAG_NO_UNLOAD; - return; - } - + if (cmd == ATA_CMD_ID_ATAPI) + ide_classify_atapi_dev(drive); + else /* * Not an ATAPI device: looks like a "regular" hard disk */ - - is_cfa = ata_id_is_cfa(id); - - /* CF devices are *not* removable in Linux definition of the term */ - if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7))) - drive->dev_flags |= IDE_DFLAG_REMOVABLE; - - drive->media = ide_disk; - - if (!ata_id_has_unload(drive->id)) - drive->dev_flags |= IDE_DFLAG_NO_UNLOAD; - - printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA"); - + ide_classify_ata_dev(drive); return; - err_misc: kfree(id); drive->dev_flags &= ~IDE_DFLAG_PRESENT; - return; } /** @@ -641,14 +649,9 @@ static int ide_register_port(ide_hwif_t *hwif) /* register with global device tree */ dev_set_name(&hwif->gendev, hwif->name); hwif->gendev.driver_data = hwif; - if (hwif->gendev.parent == NULL) { - if (hwif->dev) - hwif->gendev.parent = hwif->dev; - else - /* Would like to do = &device_legacy */ - hwif->gendev.parent = NULL; - } + hwif->gendev.parent = hwif->dev; hwif->gendev.release = hwif_release_dev; + ret = device_register(&hwif->gendev); if (ret < 0) { printk(KERN_WARNING "IDE: %s: device_register error: %d\n", @@ -878,8 +881,7 @@ static int ide_init_queue(ide_drive_t *drive) * do not. */ - q = blk_init_queue_node(do_ide_request, &hwif->hwgroup->lock, - hwif_to_node(hwif)); + q = blk_init_queue_node(do_ide_request, NULL, hwif_to_node(hwif)); if (!q) return 1; @@ -1139,8 +1141,6 @@ static struct kobject *ata_probe(dev_t dev, int *part, void *data) if (drive->media == ide_disk) request_module("ide-disk"); - if (drive->dev_flags & IDE_DFLAG_SCSI) - request_module("ide-scsi"); if (drive->media == ide_cdrom || drive->media == ide_optical) request_module("ide-cd"); if (drive->media == ide_tape) @@ -1417,58 +1417,6 @@ static void ide_port_cable_detect(ide_hwif_t *hwif) } } -static ssize_t store_delete_devices(struct device *portdev, - struct device_attribute *attr, - const char *buf, size_t n) -{ - ide_hwif_t *hwif = dev_get_drvdata(portdev); - - if (strncmp(buf, "1", n)) - return -EINVAL; - - ide_port_unregister_devices(hwif); - - return n; -}; - -static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices); - -static ssize_t store_scan(struct device *portdev, - struct device_attribute *attr, - const char *buf, size_t n) -{ - ide_hwif_t *hwif = dev_get_drvdata(portdev); - - if (strncmp(buf, "1", n)) - return -EINVAL; - - ide_port_unregister_devices(hwif); - ide_port_scan(hwif); - - return n; -}; - -static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan); - -static struct device_attribute *ide_port_attrs[] = { - &dev_attr_delete_devices, - &dev_attr_scan, - NULL -}; - -static int ide_sysfs_register_port(ide_hwif_t *hwif) -{ - int i, uninitialized_var(rc); - - for (i = 0; ide_port_attrs[i]; i++) { - rc = device_create_file(hwif->portdev, ide_port_attrs[i]); - if (rc) - break; - } - - return rc; -} - static unsigned int ide_indexes; /** @@ -1655,9 +1603,6 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, if (hwif == NULL) continue; - if (hwif->chipset == ide_unknown) - hwif->chipset = ide_generic; - if (hwif->present) hwif_register_devices(hwif); } diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c new file mode 100644 index 000000000000..883ffacaf45a --- /dev/null +++ b/drivers/ide/ide-sysfs.c @@ -0,0 +1,125 @@ +#include <linux/kernel.h> +#include <linux/ide.h> + +char *ide_media_string(ide_drive_t *drive) +{ + switch (drive->media) { + case ide_disk: + return "disk"; + case ide_cdrom: + return "cdrom"; + case ide_tape: + return "tape"; + case ide_floppy: + return "floppy"; + case ide_optical: + return "optical"; + default: + return "UNKNOWN"; + } +} + +static ssize_t media_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "%s\n", ide_media_string(drive)); +} + +static ssize_t drivename_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "%s\n", drive->name); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "ide:m-%s\n", ide_media_string(drive)); +} + +static ssize_t model_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]); +} + +static ssize_t firmware_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]); +} + +static ssize_t serial_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + ide_drive_t *drive = to_ide_device(dev); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]); +} + +struct device_attribute ide_dev_attrs[] = { + __ATTR_RO(media), + __ATTR_RO(drivename), + __ATTR_RO(modalias), + __ATTR_RO(model), + __ATTR_RO(firmware), + __ATTR(serial, 0400, serial_show, NULL), + __ATTR(unload_heads, 0644, ide_park_show, ide_park_store), + __ATTR_NULL +}; + +static ssize_t store_delete_devices(struct device *portdev, + struct device_attribute *attr, + const char *buf, size_t n) +{ + ide_hwif_t *hwif = dev_get_drvdata(portdev); + + if (strncmp(buf, "1", n)) + return -EINVAL; + + ide_port_unregister_devices(hwif); + + return n; +}; + +static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices); + +static ssize_t store_scan(struct device *portdev, + struct device_attribute *attr, + const char *buf, size_t n) +{ + ide_hwif_t *hwif = dev_get_drvdata(portdev); + + if (strncmp(buf, "1", n)) + return -EINVAL; + + ide_port_unregister_devices(hwif); + ide_port_scan(hwif); + + return n; +}; + +static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan); + +static struct device_attribute *ide_port_attrs[] = { + &dev_attr_delete_devices, + &dev_attr_scan, + NULL +}; + +int ide_sysfs_register_port(ide_hwif_t *hwif) +{ + int i, uninitialized_var(rc); + + for (i = 0; ide_port_attrs[i]; i++) { + rc = device_create_file(hwif->portdev, ide_port_attrs[i]); + if (rc) + break; + } + + return rc; +} diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index a2d470eb2b55..5d2aa22cd6e4 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -694,7 +694,7 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, pc->retries++; - return ide_issue_pc(drive, WAIT_TAPE_CMD, NULL); + return ide_issue_pc(drive); } /* A mode sense command is used to "sense" tape parameters. */ diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index f0f09f702e9c..46a2d4ca812b 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -440,81 +440,13 @@ static int ide_bus_match(struct device *dev, struct device_driver *drv) return 1; } -static char *media_string(ide_drive_t *drive) -{ - switch (drive->media) { - case ide_disk: - return "disk"; - case ide_cdrom: - return "cdrom"; - case ide_tape: - return "tape"; - case ide_floppy: - return "floppy"; - case ide_optical: - return "optical"; - default: - return "UNKNOWN"; - } -} - -static ssize_t media_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", media_string(drive)); -} - -static ssize_t drivename_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", drive->name); -} - -static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "ide:m-%s\n", media_string(drive)); -} - -static ssize_t model_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]); -} - -static ssize_t firmware_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]); -} - -static ssize_t serial_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]); -} - -static struct device_attribute ide_dev_attrs[] = { - __ATTR_RO(media), - __ATTR_RO(drivename), - __ATTR_RO(modalias), - __ATTR_RO(model), - __ATTR_RO(firmware), - __ATTR(serial, 0400, serial_show, NULL), - __ATTR(unload_heads, 0644, ide_park_show, ide_park_store), - __ATTR_NULL -}; - static int ide_uevent(struct device *dev, struct kobj_uevent_env *env) { ide_drive_t *drive = to_ide_device(dev); - add_uevent_var(env, "MEDIA=%s", media_string(drive)); + add_uevent_var(env, "MEDIA=%s", ide_media_string(drive)); add_uevent_var(env, "DRIVENAME=%s", drive->name); - add_uevent_var(env, "MODALIAS=ide:m-%s", media_string(drive)); + add_uevent_var(env, "MODALIAS=ide:m-%s", ide_media_string(drive)); return 0; } diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index 13b63e7fa353..b4ef218072cd 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -216,16 +216,17 @@ static const struct ide_tp_ops tx4938ide_tp_ops = { #endif /* __BIG_ENDIAN */ static const struct ide_port_ops tx4938ide_port_ops = { - .set_pio_mode = tx4938ide_set_pio_mode, + .set_pio_mode = tx4938ide_set_pio_mode, }; static const struct ide_port_info tx4938ide_port_info __initdata = { - .port_ops = &tx4938ide_port_ops, + .port_ops = &tx4938ide_port_ops, #ifdef __BIG_ENDIAN - .tp_ops = &tx4938ide_tp_ops, + .tp_ops = &tx4938ide_tp_ops, #endif - .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, - .pio_mask = ATA_PIO5, + .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, + .pio_mask = ATA_PIO5, + .chipset = ide_generic, }; static int __init tx4938ide_probe(struct platform_device *pdev) diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c index 97cd9e0f66f6..4a8c5a21bd4c 100644 --- a/drivers/ide/tx4939ide.c +++ b/drivers/ide/tx4939ide.c @@ -623,33 +623,34 @@ static const struct ide_tp_ops tx4939ide_tp_ops = { #endif /* __LITTLE_ENDIAN */ static const struct ide_port_ops tx4939ide_port_ops = { - .set_pio_mode = tx4939ide_set_pio_mode, - .set_dma_mode = tx4939ide_set_dma_mode, - .clear_irq = tx4939ide_clear_irq, - .cable_detect = tx4939ide_cable_detect, + .set_pio_mode = tx4939ide_set_pio_mode, + .set_dma_mode = tx4939ide_set_dma_mode, + .clear_irq = tx4939ide_clear_irq, + .cable_detect = tx4939ide_cable_detect, }; static const struct ide_dma_ops tx4939ide_dma_ops = { - .dma_host_set = tx4939ide_dma_host_set, - .dma_setup = tx4939ide_dma_setup, - .dma_exec_cmd = ide_dma_exec_cmd, - .dma_start = ide_dma_start, - .dma_end = tx4939ide_dma_end, - .dma_test_irq = tx4939ide_dma_test_irq, - .dma_lost_irq = ide_dma_lost_irq, - .dma_timeout = ide_dma_timeout, + .dma_host_set = tx4939ide_dma_host_set, + .dma_setup = tx4939ide_dma_setup, + .dma_exec_cmd = ide_dma_exec_cmd, + .dma_start = ide_dma_start, + .dma_end = tx4939ide_dma_end, + .dma_test_irq = tx4939ide_dma_test_irq, + .dma_lost_irq = ide_dma_lost_irq, + .dma_timeout = ide_dma_timeout, }; static const struct ide_port_info tx4939ide_port_info __initdata = { - .init_hwif = tx4939ide_init_hwif, - .init_dma = tx4939ide_init_dma, - .port_ops = &tx4939ide_port_ops, - .dma_ops = &tx4939ide_dma_ops, - .tp_ops = &tx4939ide_tp_ops, - .host_flags = IDE_HFLAG_MMIO, - .pio_mask = ATA_PIO4, - .mwdma_mask = ATA_MWDMA2, - .udma_mask = ATA_UDMA5, + .init_hwif = tx4939ide_init_hwif, + .init_dma = tx4939ide_init_dma, + .port_ops = &tx4939ide_port_ops, + .dma_ops = &tx4939ide_dma_ops, + .tp_ops = &tx4939ide_tp_ops, + .host_flags = IDE_HFLAG_MMIO, + .pio_mask = ATA_PIO4, + .mwdma_mask = ATA_MWDMA2, + .udma_mask = ATA_UDMA5, + .chipset = ide_generic, }; static int __init tx4939ide_probe(struct platform_device *pdev) diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c index a1039068f95c..415fab0125ac 100644 --- a/drivers/lguest/interrupts_and_traps.c +++ b/drivers/lguest/interrupts_and_traps.c @@ -222,11 +222,16 @@ bool check_syscall_vector(struct lguest *lg) int init_interrupts(void) { /* If they want some strange system call vector, reserve it now */ - if (syscall_vector != SYSCALL_VECTOR - && test_and_set_bit(syscall_vector, used_vectors)) { - printk("lg: couldn't reserve syscall %u\n", syscall_vector); - return -EBUSY; + if (syscall_vector != SYSCALL_VECTOR) { + if (test_bit(syscall_vector, used_vectors) || + vector_used_by_percpu_irq(syscall_vector)) { + printk(KERN_ERR "lg: couldn't reserve syscall %u\n", + syscall_vector); + return -EBUSY; + } + set_bit(syscall_vector, used_vectors); } + return 0; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index c7630a228310..ba0bd3d5775b 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -815,19 +815,20 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) if (err) goto out_free; + err = -ENOMEM; ubi->peb_buf1 = vmalloc(ubi->peb_size); if (!ubi->peb_buf1) goto out_free; ubi->peb_buf2 = vmalloc(ubi->peb_size); if (!ubi->peb_buf2) - goto out_free; + goto out_free; #ifdef CONFIG_MTD_UBI_DEBUG mutex_init(&ubi->dbg_buf_mutex); ubi->dbg_peb_buf = vmalloc(ubi->peb_size); if (!ubi->dbg_peb_buf) - goto out_free; + goto out_free; #endif err = attach_by_scanning(ubi); diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index b30a0b83d7f1..98cf31ed0814 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -721,7 +721,8 @@ static int rename_volumes(struct ubi_device *ubi, * It seems we need to remove volume with name @re->new_name, * if it exists. */ - desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, UBI_EXCLUSIVE); + desc = ubi_open_volume_nm(ubi->ubi_num, re->new_name, + UBI_EXCLUSIVE); if (IS_ERR(desc)) { err = PTR_ERR(desc); if (err == -ENODEV) diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 78e914d23ece..13777e5beac9 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -27,11 +27,11 @@ #define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) #define ubi_assert(expr) do { \ - if (unlikely(!(expr))) { \ - printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ - __func__, __LINE__, current->pid); \ - ubi_dbg_dump_stack(); \ - } \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + ubi_dbg_dump_stack(); \ + } \ } while (0) #define dbg_msg(fmt, ...) \ diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index d8966bae0e0b..048a606cebde 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -504,12 +504,9 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, if (!vid_hdr) return -ENOMEM; - mutex_lock(&ubi->buf_mutex); - retry: new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); if (new_pnum < 0) { - mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); return new_pnum; } @@ -529,20 +526,23 @@ retry: goto write_error; data_size = offset + len; + mutex_lock(&ubi->buf_mutex); memset(ubi->peb_buf1 + offset, 0xFF, len); /* Read everything before the area where the write failure happened */ if (offset > 0) { err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); if (err && err != UBI_IO_BITFLIPS) - goto out_put; + goto out_unlock; } memcpy(ubi->peb_buf1 + offset, buf, len); err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); - if (err) + if (err) { + mutex_unlock(&ubi->buf_mutex); goto write_error; + } mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); @@ -553,8 +553,9 @@ retry: ubi_msg("data was successfully recovered"); return 0; -out_put: +out_unlock: mutex_unlock(&ubi->buf_mutex); +out_put: ubi_wl_put_peb(ubi, new_pnum, 1); ubi_free_vid_hdr(ubi, vid_hdr); return err; @@ -567,7 +568,6 @@ write_error: ubi_warn("failed to write to PEB %d", new_pnum); ubi_wl_put_peb(ubi, new_pnum, 1); if (++tries > UBI_IO_RETRIES) { - mutex_unlock(&ubi->buf_mutex); ubi_free_vid_hdr(ubi, vid_hdr); return err; } @@ -949,10 +949,14 @@ write_error: * This function copies logical eraseblock from physical eraseblock @from to * physical eraseblock @to. The @vid_hdr buffer may be changed by this * function. Returns: - * o %0 in case of success; - * o %1 if the operation was canceled and should be tried later (e.g., - * because a bit-flip was detected at the target PEB); - * o %2 if the volume is being deleted and this LEB should not be moved. + * o %0 in case of success; + * o %1 if the operation was canceled because the volume is being deleted + * or because the PEB was put meanwhile; + * o %2 if the operation was canceled because there was a write error to the + * target PEB; + * o %-EAGAIN if the operation was canceled because a bit-flip was detected + * in the target PEB; + * o a negative error code in case of failure. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_hdr *vid_hdr) @@ -978,7 +982,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * Note, we may race with volume deletion, which means that the volume * this logical eraseblock belongs to might be being deleted. Since the - * volume deletion unmaps all the volume's logical eraseblocks, it will + * volume deletion un-maps all the volume's logical eraseblocks, it will * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. */ vol = ubi->volumes[idx]; @@ -986,7 +990,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* No need to do further work, cancel */ dbg_eba("volume %d is being removed, cancel", vol_id); spin_unlock(&ubi->volumes_lock); - return 2; + return 1; } spin_unlock(&ubi->volumes_lock); @@ -1023,7 +1027,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, /* * OK, now the LEB is locked and we can safely start moving it. Since - * this function utilizes thie @ubi->peb1_buf buffer which is shared + * this function utilizes the @ubi->peb1_buf buffer which is shared * with some other functions, so lock the buffer by taking the * @ubi->buf_mutex. */ @@ -1068,8 +1072,11 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); - if (err) + if (err) { + if (err == -EIO) + err = 2; goto out_unlock_buf; + } cond_resched(); @@ -1079,14 +1086,17 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, if (err != UBI_IO_BITFLIPS) ubi_warn("cannot read VID header back from PEB %d", to); else - err = 1; + err = -EAGAIN; goto out_unlock_buf; } if (data_size > 0) { err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); - if (err) + if (err) { + if (err == -EIO) + err = 2; goto out_unlock_buf; + } cond_resched(); @@ -1101,15 +1111,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, ubi_warn("cannot read data back from PEB %d", to); else - err = 1; + err = -EAGAIN; goto out_unlock_buf; } cond_resched(); if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { - ubi_warn("read data back from PEB %d - it is different", - to); + ubi_warn("read data back from PEB %d and it is " + "different", to); + err = -EINVAL; goto out_unlock_buf; } } diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 2fb64be44f1b..a74118c05745 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -637,8 +637,6 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, dbg_io("read EC header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - if (UBI_IO_DEBUG) - verbose = 1; err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); if (err) { @@ -685,6 +683,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, if (verbose) ubi_warn("no EC header found at PEB %d, " "only 0xFF bytes", pnum); + else if (UBI_IO_DEBUG) + dbg_msg("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); return UBI_IO_PEB_EMPTY; } @@ -696,7 +697,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, ubi_warn("bad magic number at PEB %d: %08x instead of " "%08x", pnum, magic, UBI_EC_HDR_MAGIC); ubi_dbg_dump_ec_hdr(ec_hdr); - } + } else if (UBI_IO_DEBUG) + dbg_msg("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); return UBI_IO_BAD_EC_HDR; } @@ -708,7 +711,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, ubi_warn("bad EC header CRC at PEB %d, calculated " "%#08x, read %#08x", pnum, crc, hdr_crc); ubi_dbg_dump_ec_hdr(ec_hdr); - } + } else if (UBI_IO_DEBUG) + dbg_msg("bad EC header CRC at PEB %d, calculated " + "%#08x, read %#08x", pnum, crc, hdr_crc); return UBI_IO_BAD_EC_HDR; } @@ -912,8 +917,6 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, dbg_io("read VID header from PEB %d", pnum); ubi_assert(pnum >= 0 && pnum < ubi->peb_count); - if (UBI_IO_DEBUG) - verbose = 1; p = (char *)vid_hdr - ubi->vid_hdr_shift; err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, @@ -960,6 +963,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, if (verbose) ubi_warn("no VID header found at PEB %d, " "only 0xFF bytes", pnum); + else if (UBI_IO_DEBUG) + dbg_msg("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); return UBI_IO_PEB_FREE; } @@ -971,7 +977,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, ubi_warn("bad magic number at PEB %d: %08x instead of " "%08x", pnum, magic, UBI_VID_HDR_MAGIC); ubi_dbg_dump_vid_hdr(vid_hdr); - } + } else if (UBI_IO_DEBUG) + dbg_msg("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); return UBI_IO_BAD_VID_HDR; } @@ -983,7 +991,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, ubi_warn("bad CRC at PEB %d, calculated %#08x, " "read %#08x", pnum, crc, hdr_crc); ubi_dbg_dump_vid_hdr(vid_hdr); - } + } else if (UBI_IO_DEBUG) + dbg_msg("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); return UBI_IO_BAD_VID_HDR; } @@ -1024,7 +1034,7 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, err = paranoid_check_peb_ec_hdr(ubi, pnum); if (err) - return err > 0 ? -EINVAL: err; + return err > 0 ? -EINVAL : err; vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); vid_hdr->version = UBI_VERSION; diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index 1c3fa18c26a7..4a8ec485c91d 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -74,6 +74,13 @@ #define UBI_IO_RETRIES 3 /* + * Length of the protection queue. The length is effectively equivalent to the + * number of (global) erase cycles PEBs are protected from the wear-leveling + * worker. + */ +#define UBI_PROT_QUEUE_LEN 10 + +/* * Error codes returned by the I/O sub-system. * * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only @@ -95,7 +102,8 @@ enum { /** * struct ubi_wl_entry - wear-leveling entry. - * @rb: link in the corresponding RB-tree + * @u.rb: link in the corresponding (free/used) RB-tree + * @u.list: link in the protection queue * @ec: erase counter * @pnum: physical eraseblock number * @@ -104,7 +112,10 @@ enum { * RB-trees. See WL sub-system for details. */ struct ubi_wl_entry { - struct rb_node rb; + union { + struct rb_node rb; + struct list_head list; + } u; int ec; int pnum; }; @@ -288,7 +299,7 @@ struct ubi_wl_entry; * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling * * @autoresize_vol_id: ID of the volume which has to be auto-resized at the end - * of UBI ititializetion + * of UBI initialization * @vtbl_slots: how many slots are available in the volume table * @vtbl_size: size of the volume table in bytes * @vtbl: in-RAM volume table copy @@ -306,18 +317,17 @@ struct ubi_wl_entry; * @used: RB-tree of used physical eraseblocks * @free: RB-tree of free physical eraseblocks * @scrub: RB-tree of physical eraseblocks which need scrubbing - * @prot: protection trees - * @prot.pnum: protection tree indexed by physical eraseblock numbers - * @prot.aec: protection tree indexed by absolute erase counter value - * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, - * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works - * fields + * @pq: protection queue (contain physical eraseblocks which are temporarily + * protected from the wear-leveling worker) + * @pq_head: protection queue head + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, + * @move_to, @move_to_put @erase_pending, @wl_scheduled and @works + * fields * @move_mutex: serializes eraseblock moves - * @work_sem: sycnhronizes the WL worker with use tasks + * @work_sem: synchronizes the WL worker with use tasks * @wl_scheduled: non-zero if the wear-leveling was scheduled * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any * physical eraseblock - * @abs_ec: absolute erase counter * @move_from: physical eraseblock from where the data is being moved * @move_to: physical eraseblock where the data is being moved to * @move_to_put: if the "to" PEB was put @@ -351,11 +361,11 @@ struct ubi_wl_entry; * * @peb_buf1: a buffer of PEB size used for different purposes * @peb_buf2: another buffer of PEB size used for different purposes - * @buf_mutex: proptects @peb_buf1 and @peb_buf2 + * @buf_mutex: protects @peb_buf1 and @peb_buf2 * @ckvol_mutex: serializes static volume checking when opening - * @mult_mutex: serializes operations on multiple volumes, like re-nameing + * @mult_mutex: serializes operations on multiple volumes, like re-naming * @dbg_peb_buf: buffer of PEB size used for debugging - * @dbg_buf_mutex: proptects @dbg_peb_buf + * @dbg_buf_mutex: protects @dbg_peb_buf */ struct ubi_device { struct cdev cdev; @@ -392,16 +402,13 @@ struct ubi_device { struct rb_root used; struct rb_root free; struct rb_root scrub; - struct { - struct rb_root pnum; - struct rb_root aec; - } prot; + struct list_head pq[UBI_PROT_QUEUE_LEN]; + int pq_head; spinlock_t wl_lock; struct mutex move_mutex; struct rw_semaphore work_sem; int wl_scheduled; struct ubi_wl_entry **lookuptbl; - unsigned long long abs_ec; struct ubi_wl_entry *move_from; struct ubi_wl_entry *move_to; int move_to_put; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index dcb6dac1dc54..14901cb82c18 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -22,7 +22,7 @@ * UBI wear-leveling sub-system. * * This sub-system is responsible for wear-leveling. It works in terms of - * physical* eraseblocks and erase counters and knows nothing about logical + * physical eraseblocks and erase counters and knows nothing about logical * eraseblocks, volumes, etc. From this sub-system's perspective all physical * eraseblocks are of two types - used and free. Used physical eraseblocks are * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical @@ -55,8 +55,39 @@ * * As it was said, for the UBI sub-system all physical eraseblocks are either * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while - * used eraseblocks are kept in a set of different RB-trees: @wl->used, - * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or + * (temporarily) in the @wl->pq queue. + * + * When the WL sub-system returns a physical eraseblock, the physical + * eraseblock is protected from being moved for some "time". For this reason, + * the physical eraseblock is not directly moved from the @wl->free tree to the + * @wl->used tree. There is a protection queue in between where this + * physical eraseblock is temporarily stored (@wl->pq). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait; this is + * especially important in case of "short term" physical eraseblocks. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * protection queue. Eraseblocks are put to the tail of this queue when they + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the + * head of the queue on each erase operation (for any eraseblock). So the + * length of the queue defines how may (global) erase cycles PEBs are protected. + * + * To put it differently, each physical eraseblock has 2 main states: free and + * used. The former state corresponds to the @wl->free tree. The latter state + * is split up on several sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is temporarily prohibited (@wl->pq queue); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those structures. * * Note, in this implementation, we keep a small in-RAM object for each physical * eraseblock. This is surely not a scalable solution. But it appears to be good @@ -70,9 +101,6 @@ * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we * pick target PEB with an average EC if our PEB is not very "old". This is a * room for future re-works of the WL sub-system. - * - * Note: the stuff with protection trees looks too complex and is difficult to - * understand. Should be fixed. */ #include <linux/slab.h> @@ -85,14 +113,6 @@ #define WL_RESERVED_PEBS 1 /* - * How many erase cycles are short term, unknown, and long term physical - * eraseblocks protected. - */ -#define ST_PROTECTION 16 -#define U_PROTECTION 10 -#define LT_PROTECTION 4 - -/* * Maximum difference between two erase counters. If this threshold is * exceeded, the WL sub-system starts moving data from used physical * eraseblocks with low erase counter to free physical eraseblocks with high @@ -120,64 +140,9 @@ #define WL_MAX_FAILURES 32 /** - * struct ubi_wl_prot_entry - PEB protection entry. - * @rb_pnum: link in the @wl->prot.pnum RB-tree - * @rb_aec: link in the @wl->prot.aec RB-tree - * @abs_ec: the absolute erase counter value when the protection ends - * @e: the wear-leveling entry of the physical eraseblock under protection - * - * When the WL sub-system returns a physical eraseblock, the physical - * eraseblock is protected from being moved for some "time". For this reason, - * the physical eraseblock is not directly moved from the @wl->free tree to the - * @wl->used tree. There is one more tree in between where this physical - * eraseblock is temporarily stored (@wl->prot). - * - * All this protection stuff is needed because: - * o we don't want to move physical eraseblocks just after we have given them - * to the user; instead, we first want to let users fill them up with data; - * - * o there is a chance that the user will put the physical eraseblock very - * soon, so it makes sense not to move it for some time, but wait; this is - * especially important in case of "short term" physical eraseblocks. - * - * Physical eraseblocks stay protected only for limited time. But the "time" is - * measured in erase cycles in this case. This is implemented with help of the - * absolute erase counter (@wl->abs_ec). When it reaches certain value, the - * physical eraseblocks are moved from the protection trees (@wl->prot.*) to - * the @wl->used tree. - * - * Protected physical eraseblocks are searched by physical eraseblock number - * (when they are put) and by the absolute erase counter (to check if it is - * time to move them to the @wl->used tree). So there are actually 2 RB-trees - * storing the protected physical eraseblocks: @wl->prot.pnum and - * @wl->prot.aec. They are referred to as the "protection" trees. The - * first one is indexed by the physical eraseblock number. The second one is - * indexed by the absolute erase counter. Both trees store - * &struct ubi_wl_prot_entry objects. - * - * Each physical eraseblock has 2 main states: free and used. The former state - * corresponds to the @wl->free tree. The latter state is split up on several - * sub-states: - * o the WL movement is allowed (@wl->used tree); - * o the WL movement is temporarily prohibited (@wl->prot.pnum and - * @wl->prot.aec trees); - * o scrubbing is needed (@wl->scrub tree). - * - * Depending on the sub-state, wear-leveling entries of the used physical - * eraseblocks may be kept in one of those trees. - */ -struct ubi_wl_prot_entry { - struct rb_node rb_pnum; - struct rb_node rb_aec; - unsigned long long abs_ec; - struct ubi_wl_entry *e; -}; - -/** * struct ubi_work - UBI work description data structure. * @list: a link in the list of pending works * @func: worker function - * @priv: private data of the worker function * @e: physical eraseblock to erase * @torture: if the physical eraseblock has to be tortured * @@ -198,9 +163,11 @@ struct ubi_work { static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root); +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); #else #define paranoid_check_ec(ubi, pnum, ec) 0 #define paranoid_check_in_wl_tree(e, root) +#define paranoid_check_in_pq(ubi, e) 0 #endif /** @@ -220,7 +187,7 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) struct ubi_wl_entry *e1; parent = *p; - e1 = rb_entry(parent, struct ubi_wl_entry, rb); + e1 = rb_entry(parent, struct ubi_wl_entry, u.rb); if (e->ec < e1->ec) p = &(*p)->rb_left; @@ -235,8 +202,8 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) } } - rb_link_node(&e->rb, parent, p); - rb_insert_color(&e->rb, root); + rb_link_node(&e->u.rb, parent, p); + rb_insert_color(&e->u.rb, root); } /** @@ -331,7 +298,7 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) while (p) { struct ubi_wl_entry *e1; - e1 = rb_entry(p, struct ubi_wl_entry, rb); + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e->pnum == e1->pnum) { ubi_assert(e == e1); @@ -355,50 +322,24 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) } /** - * prot_tree_add - add physical eraseblock to protection trees. + * prot_queue_add - add physical eraseblock to the protection queue. * @ubi: UBI device description object * @e: the physical eraseblock to add - * @pe: protection entry object to use - * @abs_ec: absolute erase counter value when this physical eraseblock has - * to be removed from the protection trees. * - * @wl->lock has to be locked. + * This function adds @e to the tail of the protection queue @ubi->pq, where + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to + * be locked. */ -static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, - struct ubi_wl_prot_entry *pe, int abs_ec) +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) { - struct rb_node **p, *parent = NULL; - struct ubi_wl_prot_entry *pe1; - - pe->e = e; - pe->abs_ec = ubi->abs_ec + abs_ec; - - p = &ubi->prot.pnum.rb_node; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); - - if (e->pnum < pe1->e->pnum) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_pnum, parent, p); - rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); - - p = &ubi->prot.aec.rb_node; - parent = NULL; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); + int pq_tail = ubi->pq_head - 1; - if (pe->abs_ec < pe1->abs_ec) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_aec, parent, p); - rb_insert_color(&pe->rb_aec, &ubi->prot.aec); + if (pq_tail < 0) + pq_tail = UBI_PROT_QUEUE_LEN - 1; + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); + list_add_tail(&e->u.list, &ubi->pq[pq_tail]); + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); } /** @@ -414,14 +355,14 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) struct rb_node *p; struct ubi_wl_entry *e; - e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); max += e->ec; p = root->rb_node; while (p) { struct ubi_wl_entry *e1; - e1 = rb_entry(p, struct ubi_wl_entry, rb); + e1 = rb_entry(p, struct ubi_wl_entry, u.rb); if (e1->ec >= max) p = p->rb_left; else { @@ -443,17 +384,12 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) */ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) { - int err, protect, medium_ec; + int err, medium_ec; struct ubi_wl_entry *e, *first, *last; - struct ubi_wl_prot_entry *pe; ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || dtype == UBI_UNKNOWN); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) - return -ENOMEM; - retry: spin_lock(&ubi->wl_lock); if (!ubi->free.rb_node) { @@ -461,16 +397,13 @@ retry: ubi_assert(list_empty(&ubi->works)); ubi_err("no free eraseblocks"); spin_unlock(&ubi->wl_lock); - kfree(pe); return -ENOSPC; } spin_unlock(&ubi->wl_lock); err = produce_free_peb(ubi); - if (err < 0) { - kfree(pe); + if (err < 0) return err; - } goto retry; } @@ -483,7 +416,6 @@ retry: * %WL_FREE_MAX_DIFF. */ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - protect = LT_PROTECTION; break; case UBI_UNKNOWN: /* @@ -492,81 +424,63 @@ retry: * eraseblock with erase counter greater or equivalent than the * lowest erase counter plus %WL_FREE_MAX_DIFF. */ - first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); - last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb); + first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, + u.rb); + last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb); if (last->ec - first->ec < WL_FREE_MAX_DIFF) e = rb_entry(ubi->free.rb_node, - struct ubi_wl_entry, rb); + struct ubi_wl_entry, u.rb); else { medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; e = find_wl_entry(&ubi->free, medium_ec); } - protect = U_PROTECTION; break; case UBI_SHORTTERM: /* * For short term data we pick a physical eraseblock with the * lowest erase counter as we expect it will be erased soon. */ - e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); - protect = ST_PROTECTION; + e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); break; default: - protect = 0; - e = NULL; BUG(); } + paranoid_check_in_wl_tree(e, &ubi->free); + /* - * Move the physical eraseblock to the protection trees where it will + * Move the physical eraseblock to the protection queue where it will * be protected from being moved for some time. */ - paranoid_check_in_wl_tree(e, &ubi->free); - rb_erase(&e->rb, &ubi->free); - prot_tree_add(ubi, e, pe, protect); - - dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); + rb_erase(&e->u.rb, &ubi->free); + dbg_wl("PEB %d EC %d", e->pnum, e->ec); + prot_queue_add(ubi, e); spin_unlock(&ubi->wl_lock); - return e->pnum; } /** - * prot_tree_del - remove a physical eraseblock from the protection trees + * prot_queue_del - remove a physical eraseblock from the protection queue. * @ubi: UBI device description object * @pnum: the physical eraseblock to remove * - * This function returns PEB @pnum from the protection trees and returns zero - * in case of success and %-ENODEV if the PEB was not found in the protection - * trees. + * This function deletes PEB @pnum from the protection queue and returns zero + * in case of success and %-ENODEV if the PEB was not found. */ -static int prot_tree_del(struct ubi_device *ubi, int pnum) +static int prot_queue_del(struct ubi_device *ubi, int pnum) { - struct rb_node *p; - struct ubi_wl_prot_entry *pe = NULL; - - p = ubi->prot.pnum.rb_node; - while (p) { - - pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); - - if (pnum == pe->e->pnum) - goto found; + struct ubi_wl_entry *e; - if (pnum < pe->e->pnum) - p = p->rb_left; - else - p = p->rb_right; - } + e = ubi->lookuptbl[pnum]; + if (!e) + return -ENODEV; - return -ENODEV; + if (paranoid_check_in_pq(ubi, e)) + return -ENODEV; -found: - ubi_assert(pe->e->pnum == pnum); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - kfree(pe); + list_del(&e->u.list); + dbg_wl("deleted PEB %d from the protection queue", e->pnum); return 0; } @@ -632,47 +546,47 @@ out_free: } /** - * check_protection_over - check if it is time to stop protecting some PEBs. + * serve_prot_queue - check if it is time to stop protecting PEBs. * @ubi: UBI device description object * - * This function is called after each erase operation, when the absolute erase - * counter is incremented, to check if some physical eraseblock have not to be - * protected any longer. These physical eraseblocks are moved from the - * protection trees to the used tree. + * This function is called after each erase operation and removes PEBs from the + * tail of the protection queue. These PEBs have been protected for long enough + * and should be moved to the used tree. */ -static void check_protection_over(struct ubi_device *ubi) +static void serve_prot_queue(struct ubi_device *ubi) { - struct ubi_wl_prot_entry *pe; + struct ubi_wl_entry *e, *tmp; + int count; /* * There may be several protected physical eraseblock to remove, * process them all. */ - while (1) { - spin_lock(&ubi->wl_lock); - if (!ubi->prot.aec.rb_node) { - spin_unlock(&ubi->wl_lock); - break; - } - - pe = rb_entry(rb_first(&ubi->prot.aec), - struct ubi_wl_prot_entry, rb_aec); +repeat: + count = 0; + spin_lock(&ubi->wl_lock); + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { + dbg_wl("PEB %d EC %d protection over, move to used tree", + e->pnum, e->ec); - if (pe->abs_ec > ubi->abs_ec) { + list_del(&e->u.list); + wl_tree_add(e, &ubi->used); + if (count++ > 32) { + /* + * Let's be nice and avoid holding the spinlock for + * too long. + */ spin_unlock(&ubi->wl_lock); - break; + cond_resched(); + goto repeat; } - - dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", - pe->e->pnum, ubi->abs_ec, pe->abs_ec); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - wl_tree_add(pe->e, &ubi->used); - spin_unlock(&ubi->wl_lock); - - kfree(pe); - cond_resched(); } + + ubi->pq_head += 1; + if (ubi->pq_head == UBI_PROT_QUEUE_LEN) + ubi->pq_head = 0; + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); + spin_unlock(&ubi->wl_lock); } /** @@ -680,8 +594,8 @@ static void check_protection_over(struct ubi_device *ubi) * @ubi: UBI device description object * @wrk: the work to schedule * - * This function enqueues a work defined by @wrk to the tail of the pending - * works list. + * This function adds a work defined by @wrk to the tail of the pending works + * list. */ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) { @@ -739,13 +653,11 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { - int err, put = 0, scrubbing = 0, protect = 0; - struct ubi_wl_prot_entry *uninitialized_var(pe); + int err, scrubbing = 0, torture = 0; struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; kfree(wrk); - if (cancel) return 0; @@ -781,7 +693,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, * highly worn-out free physical eraseblock. If the erase * counters differ much enough, start wear-leveling. */ - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { @@ -790,21 +702,21 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, goto out_cancel; } paranoid_check_in_wl_tree(e1, &ubi->used); - rb_erase(&e1->rb, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); dbg_wl("move PEB %d EC %d to PEB %d EC %d", e1->pnum, e1->ec, e2->pnum, e2->ec); } else { /* Perform scrubbing */ scrubbing = 1; - e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); paranoid_check_in_wl_tree(e1, &ubi->scrub); - rb_erase(&e1->rb, &ubi->scrub); + rb_erase(&e1->u.rb, &ubi->scrub); dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); } paranoid_check_in_wl_tree(e2, &ubi->free); - rb_erase(&e2->rb, &ubi->free); + rb_erase(&e2->u.rb, &ubi->free); ubi->move_from = e1; ubi->move_to = e2; spin_unlock(&ubi->wl_lock); @@ -844,46 +756,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); if (err) { - + if (err == -EAGAIN) + goto out_not_moved; if (err < 0) goto out_error; - if (err == 1) + if (err == 2) { + /* Target PEB write error, torture it */ + torture = 1; goto out_not_moved; + } /* - * For some reason the LEB was not moved - it might be because - * the volume is being deleted. We should prevent this PEB from - * being selected for wear-levelling movement for some "time", - * so put it to the protection tree. + * The LEB has not been moved because the volume is being + * deleted or the PEB has been put meanwhile. We should prevent + * this PEB from being selected for wear-leveling movement + * again, so put it to the protection queue. */ - dbg_wl("cancelled moving PEB %d", e1->pnum); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) { - err = -ENOMEM; - goto out_error; - } + dbg_wl("canceled moving PEB %d", e1->pnum); + ubi_assert(err == 1); + + ubi_free_vid_hdr(ubi, vid_hdr); + vid_hdr = NULL; + + spin_lock(&ubi->wl_lock); + prot_queue_add(ubi, e1); + ubi_assert(!ubi->move_to_put); + ubi->move_from = ubi->move_to = NULL; + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); - protect = 1; + e1 = NULL; + err = schedule_erase(ubi, e2, 0); + if (err) + goto out_error; + mutex_unlock(&ubi->move_mutex); + return 0; } + /* The PEB has been successfully moved */ ubi_free_vid_hdr(ubi, vid_hdr); - if (scrubbing && !protect) + vid_hdr = NULL; + if (scrubbing) ubi_msg("scrubbed PEB %d, data moved to PEB %d", e1->pnum, e2->pnum); spin_lock(&ubi->wl_lock); - if (protect) - prot_tree_add(ubi, e1, pe, protect); - if (!ubi->move_to_put) + if (!ubi->move_to_put) { wl_tree_add(e2, &ubi->used); - else - put = 1; + e2 = NULL; + } ubi->move_from = ubi->move_to = NULL; ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - if (put) { + err = schedule_erase(ubi, e1, 0); + if (err) { + e1 = NULL; + goto out_error; + } + + if (e2) { /* * Well, the target PEB was put meanwhile, schedule it for * erasure. @@ -894,13 +827,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, goto out_error; } - if (!protect) { - err = schedule_erase(ubi, e1, 0); - if (err) - goto out_error; - } - - dbg_wl("done"); mutex_unlock(&ubi->move_mutex); return 0; @@ -908,20 +834,24 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, /* * For some reasons the LEB was not moved, might be an error, might be * something else. @e1 was not changed, so return it back. @e2 might - * be changed, schedule it for erasure. + * have been changed, schedule it for erasure. */ out_not_moved: + dbg_wl("canceled moving PEB %d", e1->pnum); ubi_free_vid_hdr(ubi, vid_hdr); + vid_hdr = NULL; spin_lock(&ubi->wl_lock); if (scrubbing) wl_tree_add(e1, &ubi->scrub); else wl_tree_add(e1, &ubi->used); + ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; - ubi->move_to_put = ubi->wl_scheduled = 0; + ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - err = schedule_erase(ubi, e2, 0); + e1 = NULL; + err = schedule_erase(ubi, e2, torture); if (err) goto out_error; @@ -938,8 +868,10 @@ out_error: ubi->move_to_put = ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); - kmem_cache_free(ubi_wl_entry_slab, e1); - kmem_cache_free(ubi_wl_entry_slab, e2); + if (e1) + kmem_cache_free(ubi_wl_entry_slab, e1); + if (e2) + kmem_cache_free(ubi_wl_entry_slab, e2); ubi_ro_mode(ubi); mutex_unlock(&ubi->move_mutex); @@ -988,7 +920,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi) * erase counter of free physical eraseblocks is greater then * %UBI_WL_THRESHOLD. */ - e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) @@ -1050,7 +982,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, kfree(wl_wrk); spin_lock(&ubi->wl_lock); - ubi->abs_ec += 1; wl_tree_add(e, &ubi->free); spin_unlock(&ubi->wl_lock); @@ -1058,7 +989,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, * One more erase operation has happened, take care about * protected physical eraseblocks. */ - check_protection_over(ubi); + serve_prot_queue(ubi); /* And take care about wear-leveling */ err = ensure_wear_leveling(ubi); @@ -1190,12 +1121,12 @@ retry: } else { if (in_wl_tree(e, &ubi->used)) { paranoid_check_in_wl_tree(e, &ubi->used); - rb_erase(&e->rb, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); } else if (in_wl_tree(e, &ubi->scrub)) { paranoid_check_in_wl_tree(e, &ubi->scrub); - rb_erase(&e->rb, &ubi->scrub); + rb_erase(&e->u.rb, &ubi->scrub); } else { - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1255,11 +1186,11 @@ retry: if (in_wl_tree(e, &ubi->used)) { paranoid_check_in_wl_tree(e, &ubi->used); - rb_erase(&e->rb, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); } else { int err; - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1290,7 +1221,7 @@ int ubi_wl_flush(struct ubi_device *ubi) int err; /* - * Erase while the pending works queue is not empty, but not more then + * Erase while the pending works queue is not empty, but not more than * the number of currently pending works. */ dbg_wl("flush (%d pending works)", ubi->works_count); @@ -1308,7 +1239,7 @@ int ubi_wl_flush(struct ubi_device *ubi) up_write(&ubi->work_sem); /* - * And in case last was the WL worker and it cancelled the LEB + * And in case last was the WL worker and it canceled the LEB * movement, flush again. */ while (ubi->works_count) { @@ -1337,11 +1268,11 @@ static void tree_destroy(struct rb_root *root) else if (rb->rb_right) rb = rb->rb_right; else { - e = rb_entry(rb, struct ubi_wl_entry, rb); + e = rb_entry(rb, struct ubi_wl_entry, u.rb); rb = rb_parent(rb); if (rb) { - if (rb->rb_left == &e->rb) + if (rb->rb_left == &e->u.rb) rb->rb_left = NULL; else rb->rb_right = NULL; @@ -1436,15 +1367,13 @@ static void cancel_pending(struct ubi_device *ubi) */ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) { - int err; + int err, i; struct rb_node *rb1, *rb2; struct ubi_scan_volume *sv; struct ubi_scan_leb *seb, *tmp; struct ubi_wl_entry *e; - ubi->used = ubi->free = ubi->scrub = RB_ROOT; - ubi->prot.pnum = ubi->prot.aec = RB_ROOT; spin_lock_init(&ubi->wl_lock); mutex_init(&ubi->move_mutex); init_rwsem(&ubi->work_sem); @@ -1458,6 +1387,10 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (!ubi->lookuptbl) return err; + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) + INIT_LIST_HEAD(&ubi->pq[i]); + ubi->pq_head = 0; + list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { cond_resched(); @@ -1552,33 +1485,18 @@ out_free: } /** - * protection_trees_destroy - destroy the protection RB-trees. + * protection_queue_destroy - destroy the protection queue. * @ubi: UBI device description object */ -static void protection_trees_destroy(struct ubi_device *ubi) +static void protection_queue_destroy(struct ubi_device *ubi) { - struct rb_node *rb; - struct ubi_wl_prot_entry *pe; - - rb = ubi->prot.aec.rb_node; - while (rb) { - if (rb->rb_left) - rb = rb->rb_left; - else if (rb->rb_right) - rb = rb->rb_right; - else { - pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); - - rb = rb_parent(rb); - if (rb) { - if (rb->rb_left == &pe->rb_aec) - rb->rb_left = NULL; - else - rb->rb_right = NULL; - } + int i; + struct ubi_wl_entry *e, *tmp; - kmem_cache_free(ubi_wl_entry_slab, pe->e); - kfree(pe); + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { + list_del(&e->u.list); + kmem_cache_free(ubi_wl_entry_slab, e); } } } @@ -1591,7 +1509,7 @@ void ubi_wl_close(struct ubi_device *ubi) { dbg_wl("close the WL sub-system"); cancel_pending(ubi); - protection_trees_destroy(ubi); + protection_queue_destroy(ubi); tree_destroy(&ubi->used); tree_destroy(&ubi->free); tree_destroy(&ubi->scrub); @@ -1661,4 +1579,27 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, return 1; } +/** + * paranoid_check_in_pq - check if wear-leveling entry is in the protection + * queue. + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * + * This function returns zero if @e is in @ubi->pq and %1 if it is not. + */ +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) +{ + struct ubi_wl_entry *p; + int i; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) + return 0; + + ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", + e->pnum, e->ec); + ubi_dbg_dump_stack(); + return 1; +} #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 9f7896a25f1b..c4918b86ed19 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -3,6 +3,8 @@ * Driver for Option High Speed Mobile Devices. * * Copyright (C) 2008 Option International + * Filip Aben <f.aben@option.com> + * Denis Joseph Barrow <d.barow@option.com> * Copyright (C) 2007 Andrew Bird (Sphere Systems Ltd) * <ajb@spheresystems.co.uk> * Copyright (C) 2008 Greg Kroah-Hartman <gregkh@suse.de> @@ -39,8 +41,11 @@ * port is opened, as this have a huge impact on the network port * throughput. * - * Interface 2: Standard modem interface - circuit switched interface, should - * not be used. + * Interface 2: Standard modem interface - circuit switched interface, this + * can be used to make a standard ppp connection however it + * should not be used in conjunction with the IP network interface + * enabled for USB performance reasons i.e. if using this set + * ideally disable_net=1. * *****************************************************************************/ @@ -63,6 +68,8 @@ #include <linux/usb/cdc.h> #include <net/arp.h> #include <asm/byteorder.h> +#include <linux/serial_core.h> +#include <linux/serial.h> #define DRIVER_VERSION "1.2" @@ -182,6 +189,41 @@ enum rx_ctrl_state{ RX_PENDING }; +#define BM_REQUEST_TYPE (0xa1) +#define B_NOTIFICATION (0x20) +#define W_VALUE (0x0) +#define W_INDEX (0x2) +#define W_LENGTH (0x2) + +#define B_OVERRUN (0x1<<6) +#define B_PARITY (0x1<<5) +#define B_FRAMING (0x1<<4) +#define B_RING_SIGNAL (0x1<<3) +#define B_BREAK (0x1<<2) +#define B_TX_CARRIER (0x1<<1) +#define B_RX_CARRIER (0x1<<0) + +struct hso_serial_state_notification { + u8 bmRequestType; + u8 bNotification; + u16 wValue; + u16 wIndex; + u16 wLength; + u16 UART_state_bitmap; +} __attribute__((packed)); + +struct hso_tiocmget { + struct mutex mutex; + wait_queue_head_t waitq; + int intr_completed; + struct usb_endpoint_descriptor *endp; + struct urb *urb; + struct hso_serial_state_notification serial_state_notification; + u16 prev_UART_state_bitmap; + struct uart_icount icount; +}; + + struct hso_serial { struct hso_device *parent; int magic; @@ -219,6 +261,7 @@ struct hso_serial { spinlock_t serial_lock; int (*write_data) (struct hso_serial *serial); + struct hso_tiocmget *tiocmget; /* Hacks required to get flow control * working on the serial receive buffers * so as not to drop characters on the floor. @@ -305,7 +348,7 @@ static void async_get_intf(struct work_struct *data); static void async_put_intf(struct work_struct *data); static int hso_put_activity(struct hso_device *hso_dev); static int hso_get_activity(struct hso_device *hso_dev); - +static void tiocmget_intr_callback(struct urb *urb); /*****************************************************************************/ /* Helping functions */ /*****************************************************************************/ @@ -362,8 +405,6 @@ static struct tty_driver *tty_drv; static struct hso_device *serial_table[HSO_SERIAL_TTY_MINORS]; static struct hso_device *network_table[HSO_MAX_NET_DEVICES]; static spinlock_t serial_table_lock; -static struct ktermios *hso_serial_termios[HSO_SERIAL_TTY_MINORS]; -static struct ktermios *hso_serial_termios_locked[HSO_SERIAL_TTY_MINORS]; static const s32 default_port_spec[] = { HSO_INTF_MUX | HSO_PORT_NETWORK, @@ -1009,23 +1050,11 @@ static void read_bulk_callback(struct urb *urb) /* Serial driver functions */ -static void _hso_serial_set_termios(struct tty_struct *tty, - struct ktermios *old) +static void hso_init_termios(struct ktermios *termios) { - struct hso_serial *serial = get_serial_by_tty(tty); - struct ktermios *termios; - - if ((!tty) || (!tty->termios) || (!serial)) { - printk(KERN_ERR "%s: no tty structures", __func__); - return; - } - - D4("port %d", serial->minor); - /* * The default requirements for this device are: */ - termios = tty->termios; termios->c_iflag &= ~(IGNBRK /* disable ignore break */ | BRKINT /* disable break causes interrupt */ @@ -1057,15 +1086,38 @@ static void _hso_serial_set_termios(struct tty_struct *tty, termios->c_cflag |= CS8; /* character size 8 bits */ /* baud rate 115200 */ - tty_encode_baud_rate(serial->tty, 115200, 115200); + tty_termios_encode_baud_rate(termios, 115200, 115200); +} + +static void _hso_serial_set_termios(struct tty_struct *tty, + struct ktermios *old) +{ + struct hso_serial *serial = get_serial_by_tty(tty); + struct ktermios *termios; + + if (!serial) { + printk(KERN_ERR "%s: no tty structures", __func__); + return; + } + + D4("port %d", serial->minor); /* - * Force low_latency on; otherwise the pushes are scheduled; - * this is bad as it opens up the possibility of dropping bytes - * on the floor. We don't want to drop bytes on the floor. :) + * Fix up unsupported bits */ - serial->tty->low_latency = 1; - return; + termios = tty->termios; + termios->c_iflag &= ~IXON; /* disable enable XON/XOFF flow control */ + + termios->c_cflag &= + ~(CSIZE /* no size */ + | PARENB /* disable parity bit */ + | CBAUD /* clear current baud rate */ + | CBAUDEX); /* clear current buad rate */ + + termios->c_cflag |= CS8; /* character size 8 bits */ + + /* baud rate 115200 */ + tty_encode_baud_rate(tty, 115200, 115200); } static void hso_resubmit_rx_bulk_urb(struct hso_serial *serial, struct urb *urb) @@ -1228,6 +1280,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) /* sanity check */ if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) { + WARN_ON(1); tty->driver_data = NULL; D1("Failed to open port"); return -ENODEV; @@ -1242,8 +1295,10 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) kref_get(&serial->parent->ref); /* setup */ + spin_lock_irq(&serial->serial_lock); tty->driver_data = serial; - serial->tty = tty; + serial->tty = tty_kref_get(tty); + spin_unlock_irq(&serial->serial_lock); /* check for port already opened, if not set the termios */ serial->open_count++; @@ -1285,6 +1340,10 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) D1("Closing serial port"); + /* Open failed, no close cleanup required */ + if (serial == NULL) + return; + mutex_lock(&serial->parent->mutex); usb_gone = serial->parent->usb_gone; @@ -1297,10 +1356,13 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) kref_put(&serial->parent->ref, hso_serial_ref_free); if (serial->open_count <= 0) { serial->open_count = 0; - if (serial->tty) { + spin_lock_irq(&serial->serial_lock); + if (serial->tty == tty) { serial->tty->driver_data = NULL; serial->tty = NULL; + tty_kref_put(tty); } + spin_unlock_irq(&serial->serial_lock); if (!usb_gone) hso_stop_serial_device(serial->parent); tasklet_kill(&serial->unthrottle_tasklet); @@ -1400,25 +1462,217 @@ static int hso_serial_chars_in_buffer(struct tty_struct *tty) return chars; } +int tiocmget_submit_urb(struct hso_serial *serial, + struct hso_tiocmget *tiocmget, + struct usb_device *usb) +{ + int result; + + if (serial->parent->usb_gone) + return -ENODEV; + usb_fill_int_urb(tiocmget->urb, usb, + usb_rcvintpipe(usb, + tiocmget->endp-> + bEndpointAddress & 0x7F), + &tiocmget->serial_state_notification, + sizeof(struct hso_serial_state_notification), + tiocmget_intr_callback, serial, + tiocmget->endp->bInterval); + result = usb_submit_urb(tiocmget->urb, GFP_ATOMIC); + if (result) { + dev_warn(&usb->dev, "%s usb_submit_urb failed %d\n", __func__, + result); + } + return result; + +} + +static void tiocmget_intr_callback(struct urb *urb) +{ + struct hso_serial *serial = urb->context; + struct hso_tiocmget *tiocmget; + int status = urb->status; + u16 UART_state_bitmap, prev_UART_state_bitmap; + struct uart_icount *icount; + struct hso_serial_state_notification *serial_state_notification; + struct usb_device *usb; + + /* Sanity checks */ + if (!serial) + return; + if (status) { + log_usb_status(status, __func__); + return; + } + tiocmget = serial->tiocmget; + if (!tiocmget) + return; + usb = serial->parent->usb; + serial_state_notification = &tiocmget->serial_state_notification; + if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE || + serial_state_notification->bNotification != B_NOTIFICATION || + le16_to_cpu(serial_state_notification->wValue) != W_VALUE || + le16_to_cpu(serial_state_notification->wIndex) != W_INDEX || + le16_to_cpu(serial_state_notification->wLength) != W_LENGTH) { + dev_warn(&usb->dev, + "hso received invalid serial state notification\n"); + DUMP(serial_state_notification, + sizeof(hso_serial_state_notifation)) + } else { + + UART_state_bitmap = le16_to_cpu(serial_state_notification-> + UART_state_bitmap); + prev_UART_state_bitmap = tiocmget->prev_UART_state_bitmap; + icount = &tiocmget->icount; + spin_lock(&serial->serial_lock); + if ((UART_state_bitmap & B_OVERRUN) != + (prev_UART_state_bitmap & B_OVERRUN)) + icount->parity++; + if ((UART_state_bitmap & B_PARITY) != + (prev_UART_state_bitmap & B_PARITY)) + icount->parity++; + if ((UART_state_bitmap & B_FRAMING) != + (prev_UART_state_bitmap & B_FRAMING)) + icount->frame++; + if ((UART_state_bitmap & B_RING_SIGNAL) && + !(prev_UART_state_bitmap & B_RING_SIGNAL)) + icount->rng++; + if ((UART_state_bitmap & B_BREAK) != + (prev_UART_state_bitmap & B_BREAK)) + icount->brk++; + if ((UART_state_bitmap & B_TX_CARRIER) != + (prev_UART_state_bitmap & B_TX_CARRIER)) + icount->dsr++; + if ((UART_state_bitmap & B_RX_CARRIER) != + (prev_UART_state_bitmap & B_RX_CARRIER)) + icount->dcd++; + tiocmget->prev_UART_state_bitmap = UART_state_bitmap; + spin_unlock(&serial->serial_lock); + tiocmget->intr_completed = 1; + wake_up_interruptible(&tiocmget->waitq); + } + memset(serial_state_notification, 0, + sizeof(struct hso_serial_state_notification)); + tiocmget_submit_urb(serial, + tiocmget, + serial->parent->usb); +} + +/* + * next few functions largely stolen from drivers/serial/serial_core.c + */ +/* Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change + * - mask passed in arg for lines of interest + * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking) + * Caller should use TIOCGICOUNT to see which one it was + */ +static int +hso_wait_modem_status(struct hso_serial *serial, unsigned long arg) +{ + DECLARE_WAITQUEUE(wait, current); + struct uart_icount cprev, cnow; + struct hso_tiocmget *tiocmget; + int ret; + + tiocmget = serial->tiocmget; + if (!tiocmget) + return -ENOENT; + /* + * note the counters on entry + */ + spin_lock_irq(&serial->serial_lock); + memcpy(&cprev, &tiocmget->icount, sizeof(struct uart_icount)); + spin_unlock_irq(&serial->serial_lock); + add_wait_queue(&tiocmget->waitq, &wait); + for (;;) { + spin_lock_irq(&serial->serial_lock); + memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount)); + spin_unlock_irq(&serial->serial_lock); + set_current_state(TASK_INTERRUPTIBLE); + if (((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) || + ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) || + ((arg & TIOCM_CD) && (cnow.dcd != cprev.dcd))) { + ret = 0; + break; + } + schedule(); + /* see if a signal did it */ + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + cprev = cnow; + } + current->state = TASK_RUNNING; + remove_wait_queue(&tiocmget->waitq, &wait); + + return ret; +} + +/* + * Get counter of input serial line interrupts (DCD,RI,DSR,CTS) + * Return: write counters to the user passed counter struct + * NB: both 1->0 and 0->1 transitions are counted except for + * RI where only 0->1 is counted. + */ +static int hso_get_count(struct hso_serial *serial, + struct serial_icounter_struct __user *icnt) +{ + struct serial_icounter_struct icount; + struct uart_icount cnow; + struct hso_tiocmget *tiocmget = serial->tiocmget; + + if (!tiocmget) + return -ENOENT; + spin_lock_irq(&serial->serial_lock); + memcpy(&cnow, &tiocmget->icount, sizeof(struct uart_icount)); + spin_unlock_irq(&serial->serial_lock); + + icount.cts = cnow.cts; + icount.dsr = cnow.dsr; + icount.rng = cnow.rng; + icount.dcd = cnow.dcd; + icount.rx = cnow.rx; + icount.tx = cnow.tx; + icount.frame = cnow.frame; + icount.overrun = cnow.overrun; + icount.parity = cnow.parity; + icount.brk = cnow.brk; + icount.buf_overrun = cnow.buf_overrun; + + return copy_to_user(icnt, &icount, sizeof(icount)) ? -EFAULT : 0; +} + static int hso_serial_tiocmget(struct tty_struct *tty, struct file *file) { - unsigned int value; + int retval; struct hso_serial *serial = get_serial_by_tty(tty); - unsigned long flags; + struct hso_tiocmget *tiocmget; + u16 UART_state_bitmap; /* sanity check */ if (!serial) { D1("no tty structures"); return -EINVAL; } - - spin_lock_irqsave(&serial->serial_lock, flags); - value = ((serial->rts_state) ? TIOCM_RTS : 0) | + spin_lock_irq(&serial->serial_lock); + retval = ((serial->rts_state) ? TIOCM_RTS : 0) | ((serial->dtr_state) ? TIOCM_DTR : 0); - spin_unlock_irqrestore(&serial->serial_lock, flags); + tiocmget = serial->tiocmget; + if (tiocmget) { - return value; + UART_state_bitmap = le16_to_cpu( + tiocmget->prev_UART_state_bitmap); + if (UART_state_bitmap & B_RING_SIGNAL) + retval |= TIOCM_RNG; + if (UART_state_bitmap & B_RX_CARRIER) + retval |= TIOCM_CD; + if (UART_state_bitmap & B_TX_CARRIER) + retval |= TIOCM_DSR; + } + spin_unlock_irq(&serial->serial_lock); + return retval; } static int hso_serial_tiocmset(struct tty_struct *tty, struct file *file, @@ -1460,6 +1714,32 @@ static int hso_serial_tiocmset(struct tty_struct *tty, struct file *file, USB_CTRL_SET_TIMEOUT); } +static int hso_serial_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct hso_serial *serial = get_serial_by_tty(tty); + void __user *uarg = (void __user *)arg; + int ret = 0; + D4("IOCTL cmd: %d, arg: %ld", cmd, arg); + + if (!serial) + return -ENODEV; + switch (cmd) { + case TIOCMIWAIT: + ret = hso_wait_modem_status(serial, arg); + break; + + case TIOCGICOUNT: + ret = hso_get_count(serial, uarg); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + + /* starts a transmit */ static void hso_kick_transmit(struct hso_serial *serial) { @@ -1653,6 +1933,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) { struct hso_serial *serial = urb->context; int status = urb->status; + struct tty_struct *tty; /* sanity check */ if (!serial) { @@ -1662,14 +1943,18 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) spin_lock(&serial->serial_lock); serial->tx_urb_used = 0; + tty = tty_kref_get(serial->tty); spin_unlock(&serial->serial_lock); if (status) { log_usb_status(status, __func__); + tty_kref_put(tty); return; } hso_put_activity(serial->parent); - if (serial->tty) - tty_wakeup(serial->tty); + if (tty) { + tty_wakeup(tty); + tty_kref_put(tty); + } hso_kick_transmit(serial); D1(" "); @@ -1706,6 +1991,7 @@ static void ctrl_callback(struct urb *urb) struct hso_serial *serial = urb->context; struct usb_ctrlrequest *req; int status = urb->status; + struct tty_struct *tty; /* sanity check */ if (!serial) @@ -1713,9 +1999,11 @@ static void ctrl_callback(struct urb *urb) spin_lock(&serial->serial_lock); serial->tx_urb_used = 0; + tty = tty_kref_get(serial->tty); spin_unlock(&serial->serial_lock); if (status) { log_usb_status(status, __func__); + tty_kref_put(tty); return; } @@ -1734,25 +2022,31 @@ static void ctrl_callback(struct urb *urb) spin_unlock(&serial->serial_lock); } else { hso_put_activity(serial->parent); - if (serial->tty) - tty_wakeup(serial->tty); + if (tty) + tty_wakeup(tty); /* response to a write command */ hso_kick_transmit(serial); } + tty_kref_put(tty); } /* handle RX data for serial port */ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) { - struct tty_struct *tty = serial->tty; + struct tty_struct *tty; int write_length_remaining = 0; int curr_write_len; + /* Sanity check */ if (urb == NULL || serial == NULL) { D1("serial = NULL"); return -2; } + spin_lock(&serial->serial_lock); + tty = tty_kref_get(serial->tty); + spin_unlock(&serial->serial_lock); + /* Push data to tty */ if (tty) { write_length_remaining = urb->actual_length - @@ -1774,6 +2068,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) serial->curr_rx_urb_offset = 0; serial->rx_urb_filled[hso_urb_to_index(serial, urb)] = 0; } + tty_kref_put(tty); return write_length_remaining; } @@ -1922,7 +2217,10 @@ static int hso_start_serial_device(struct hso_device *hso_dev, gfp_t flags) serial->shared_int->use_count++; mutex_unlock(&serial->shared_int->shared_int_lock); } - + if (serial->tiocmget) + tiocmget_submit_urb(serial, + serial->tiocmget, + serial->parent->usb); return result; } @@ -1930,6 +2228,7 @@ static int hso_stop_serial_device(struct hso_device *hso_dev) { int i; struct hso_serial *serial = dev2ser(hso_dev); + struct hso_tiocmget *tiocmget; if (!serial) return -ENODEV; @@ -1958,6 +2257,11 @@ static int hso_stop_serial_device(struct hso_device *hso_dev) } mutex_unlock(&serial->shared_int->shared_int_lock); } + tiocmget = serial->tiocmget; + if (tiocmget) { + wake_up_interruptible(&tiocmget->waitq); + usb_kill_urb(tiocmget->urb); + } return 0; } @@ -2304,6 +2608,20 @@ exit: return NULL; } +static void hso_free_tiomget(struct hso_serial *serial) +{ + struct hso_tiocmget *tiocmget = serial->tiocmget; + if (tiocmget) { + kfree(tiocmget); + if (tiocmget->urb) { + usb_free_urb(tiocmget->urb); + tiocmget->urb = NULL; + } + serial->tiocmget = NULL; + + } +} + /* Frees an AT channel ( goes for both mux and non-mux ) */ static void hso_free_serial_device(struct hso_device *hso_dev) { @@ -2322,6 +2640,7 @@ static void hso_free_serial_device(struct hso_device *hso_dev) else mutex_unlock(&serial->shared_int->shared_int_lock); } + hso_free_tiomget(serial); kfree(serial); hso_free_device(hso_dev); } @@ -2333,6 +2652,7 @@ static struct hso_device *hso_create_bulk_serial_device( struct hso_device *hso_dev; struct hso_serial *serial; int num_urbs; + struct hso_tiocmget *tiocmget; hso_dev = hso_create_device(interface, port); if (!hso_dev) @@ -2345,8 +2665,27 @@ static struct hso_device *hso_create_bulk_serial_device( serial->parent = hso_dev; hso_dev->port_data.dev_serial = serial; - if (port & HSO_PORT_MODEM) + if ((port & HSO_PORT_MASK) == HSO_PORT_MODEM) { num_urbs = 2; + serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget), + GFP_KERNEL); + /* it isn't going to break our heart if serial->tiocmget + * allocation fails don't bother checking this. + */ + if (serial->tiocmget) { + tiocmget = serial->tiocmget; + tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL); + if (tiocmget->urb) { + mutex_init(&tiocmget->mutex); + init_waitqueue_head(&tiocmget->waitq); + tiocmget->endp = hso_get_ep( + interface, + USB_ENDPOINT_XFER_INT, + USB_DIR_IN); + } else + hso_free_tiomget(serial); + } + } else num_urbs = 1; @@ -2382,6 +2721,7 @@ static struct hso_device *hso_create_bulk_serial_device( exit2: hso_serial_common_free(serial); exit: + hso_free_tiomget(serial); kfree(serial); hso_free_device(hso_dev); return NULL; @@ -2786,15 +3126,20 @@ static void hso_serial_ref_free(struct kref *ref) static void hso_free_interface(struct usb_interface *interface) { struct hso_serial *hso_dev; + struct tty_struct *tty; int i; for (i = 0; i < HSO_SERIAL_TTY_MINORS; i++) { if (serial_table[i] && (serial_table[i]->interface == interface)) { hso_dev = dev2ser(serial_table[i]); - if (hso_dev->tty) - tty_hangup(hso_dev->tty); + spin_lock_irq(&hso_dev->serial_lock); + tty = tty_kref_get(hso_dev->tty); + spin_unlock_irq(&hso_dev->serial_lock); + if (tty) + tty_hangup(tty); mutex_lock(&hso_dev->parent->mutex); + tty_kref_put(tty); hso_dev->parent->usb_gone = 1; mutex_unlock(&hso_dev->parent->mutex); kref_put(&serial_table[i]->ref, hso_serial_ref_free); @@ -2887,6 +3232,7 @@ static const struct tty_operations hso_serial_ops = { .close = hso_serial_close, .write = hso_serial_write, .write_room = hso_serial_write_room, + .ioctl = hso_serial_ioctl, .set_termios = hso_serial_set_termios, .chars_in_buffer = hso_serial_chars_in_buffer, .tiocmget = hso_serial_tiocmget, @@ -2939,9 +3285,7 @@ static int __init hso_init(void) tty_drv->subtype = SERIAL_TYPE_NORMAL; tty_drv->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; tty_drv->init_termios = tty_std_termios; - tty_drv->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; - tty_drv->termios = hso_serial_termios; - tty_drv->termios_locked = hso_serial_termios_locked; + hso_init_termios(&tty_drv->init_termios); tty_set_operations(tty_drv, &hso_serial_ops); /* register the tty driver */ diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 737bd9484822..65e8294a9e29 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -200,7 +200,7 @@ static inline unsigned long fast_get_dcookie(struct path *path) { unsigned long cookie; - if (path->dentry->d_cookie) + if (path->dentry->d_flags & DCACHE_COOKIE) return (unsigned long)path->dentry; get_dcookie(path, &cookie); return cookie; diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 7beffcab2745..9dedbbd218c3 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -704,16 +704,17 @@ static unsigned int iosapic_startup_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void iosapic_set_affinity_irq(unsigned int irq, cpumask_t dest) +static void iosapic_set_affinity_irq(unsigned int irq, + const struct cpumask *dest) { struct vector_info *vi = iosapic_get_vector(irq); u32 d0, d1, dummy_d0; unsigned long flags; - if (cpu_check_affinity(irq, &dest)) + if (cpu_check_affinity(irq, dest)) return; - vi->txn_addr = txn_affinity_addr(irq, first_cpu(dest)); + vi->txn_addr = txn_affinity_addr(irq, cpumask_first(dest)); spin_lock_irqsave(&iosapic_lock, flags); /* d1 contains the destination CPU, so only want to set that diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 8514c3a1746a..c2e1bcbb28a7 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -45,7 +45,7 @@ #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include <asm/pci_x86.h> /* Global variables */ diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 09021930589f..df146be9d2e9 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -37,7 +37,7 @@ #include "../pci.h" #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include <asm/pci_x86.h> u8 cpqhp_nic_irq; diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 633e743442ac..dd18f857dfb0 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -35,7 +35,7 @@ #include <linux/delay.h> #include <linux/wait.h> #include "../pci.h" -#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */ +#include <asm/pci_x86.h> /* for struct irq_routing_table */ #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5d72866897a8..c88485860a0a 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask); + len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask); buf[len++] = '\n'; buf[len] = '\0'; return len; @@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask); + len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 003a9b3c293f..5b3f5937ecf5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev, cpumask = pcibus_to_cpumask(to_pci_bus(dev)); ret = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask): - cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); + cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask); buf[ret++] = '\n'; buf[ret] = '\0'; return ret; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 8a8df7552969..06b71823f399 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -632,8 +632,8 @@ do_IRQ (struct pt_regs *regs) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 834e9ee7e934..92b0417f8e12 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -18,6 +18,7 @@ #include <asm/etr.h> #include <asm/lowcore.h> #include <asm/cio.h> +#include <asm/cpu.h> #include "s390mach.h" static struct semaphore m_sem; @@ -369,6 +370,8 @@ s390_do_machine_check(struct pt_regs *regs) lockdep_off(); + s390_idle_check(); + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); umode = user_mode(regs); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 152d4aa9354f..b7322976d2b7 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -21,7 +21,7 @@ config SCSI You also need to say Y here if you have a device which speaks the SCSI protocol. Examples of this include the parallel port version of the IOMEGA ZIP drive, USB storage devices, Fibre - Channel, FireWire storage and the IDE-SCSI emulation driver. + Channel, and FireWire storage. To compile this driver as a module, choose M here and read <file:Documentation/scsi/scsi.txt>. @@ -101,9 +101,9 @@ config CHR_DEV_OSST ---help--- The OnStream SC-x0 SCSI tape drives cannot be driven by the standard st driver, but instead need this special osst driver and - use the /dev/osstX char device nodes (major 206). Via usb-storage - and ide-scsi, you may be able to drive the USB-x0 and DI-x0 drives - as well. Note that there is also a second generation of OnStream + use the /dev/osstX char device nodes (major 206). Via usb-storage, + you may be able to drive the USB-x0 and DI-x0 drives as well. + Note that there is also a second generation of OnStream tape drives (ADR-x0) that supports the standard SCSI-2 commands for tapes (QIC-157) and can be driven by the standard driver st. For more information, you may have a look at the SCSI-HOWTO diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 1410697257cb..7461eb09a031 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -105,7 +105,6 @@ obj-$(CONFIG_SCSI_GDTH) += gdth.o obj-$(CONFIG_SCSI_INITIO) += initio.o obj-$(CONFIG_SCSI_INIA100) += a100u2w.o obj-$(CONFIG_SCSI_QLOGICPTI) += qlogicpti.o -obj-$(CONFIG_BLK_DEV_IDESCSI) += ide-scsi.o obj-$(CONFIG_SCSI_MESH) += mesh.o obj-$(CONFIG_SCSI_MAC53C94) += mac53c94.o obj-$(CONFIG_BLK_DEV_3W_XXXX_RAID) += 3w-xxxx.o diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c deleted file mode 100644 index c24140aff8e7..000000000000 --- a/drivers/scsi/ide-scsi.c +++ /dev/null @@ -1,840 +0,0 @@ -/* - * Copyright (C) 1996-1999 Gadi Oxman <gadio@netvision.net.il> - * Copyright (C) 2004-2005 Bartlomiej Zolnierkiewicz - */ - -/* - * Emulation of a SCSI host adapter for IDE ATAPI devices. - * - * With this driver, one can use the Linux SCSI drivers instead of the - * native IDE ATAPI drivers. - * - * Ver 0.1 Dec 3 96 Initial version. - * Ver 0.2 Jan 26 97 Fixed bug in cleanup_module() and added emulation - * of MODE_SENSE_6/MODE_SELECT_6 for cdroms. Thanks - * to Janos Farkas for pointing this out. - * Avoid using bitfields in structures for m68k. - * Added Scatter/Gather and DMA support. - * Ver 0.4 Dec 7 97 Add support for ATAPI PD/CD drives. - * Use variable timeout for each command. - * Ver 0.5 Jan 2 98 Fix previous PD/CD support. - * Allow disabling of SCSI-6 to SCSI-10 transformation. - * Ver 0.6 Jan 27 98 Allow disabling of SCSI command translation layer - * for access through /dev/sg. - * Fix MODE_SENSE_6/MODE_SELECT_6/INQUIRY translation. - * Ver 0.7 Dec 04 98 Ignore commands where lun != 0 to avoid multiple - * detection of devices with CONFIG_SCSI_MULTI_LUN - * Ver 0.8 Feb 05 99 Optical media need translation too. Reverse 0.7. - * Ver 0.9 Jul 04 99 Fix a bug in SG_SET_TRANSFORM. - * Ver 0.91 Jun 10 02 Fix "off by one" error in transforms - * Ver 0.92 Dec 31 02 Implement new SCSI mid level API - */ - -#define IDESCSI_VERSION "0.92" - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/ioport.h> -#include <linux/blkdev.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/ide.h> -#include <linux/scatterlist.h> -#include <linux/delay.h> -#include <linux/mutex.h> -#include <linux/bitops.h> - -#include <asm/io.h> -#include <asm/uaccess.h> - -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_device.h> -#include <scsi/scsi_host.h> -#include <scsi/scsi_tcq.h> -#include <scsi/sg.h> - -#define IDESCSI_DEBUG_LOG 0 - -#if IDESCSI_DEBUG_LOG -#define debug_log(fmt, args...) \ - printk(KERN_INFO "ide-scsi: " fmt, ## args) -#else -#define debug_log(fmt, args...) do {} while (0) -#endif - -/* - * SCSI command transformation layer - */ -#define IDESCSI_SG_TRANSFORM 1 /* /dev/sg transformation */ - -/* - * Log flags - */ -#define IDESCSI_LOG_CMD 0 /* Log SCSI commands */ - -typedef struct ide_scsi_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct Scsi_Host *host; - - unsigned long transform; /* SCSI cmd translation layer */ - unsigned long log; /* log flags */ -} idescsi_scsi_t; - -static DEFINE_MUTEX(idescsi_ref_mutex); -/* Set by module param to skip cd */ -static int idescsi_nocd; - -#define ide_scsi_g(disk) \ - container_of((disk)->private_data, struct ide_scsi_obj, driver) - -static struct ide_scsi_obj *ide_scsi_get(struct gendisk *disk) -{ - struct ide_scsi_obj *scsi = NULL; - - mutex_lock(&idescsi_ref_mutex); - scsi = ide_scsi_g(disk); - if (scsi) { - if (ide_device_get(scsi->drive)) - scsi = NULL; - else - scsi_host_get(scsi->host); - } - mutex_unlock(&idescsi_ref_mutex); - return scsi; -} - -static void ide_scsi_put(struct ide_scsi_obj *scsi) -{ - ide_drive_t *drive = scsi->drive; - - mutex_lock(&idescsi_ref_mutex); - scsi_host_put(scsi->host); - ide_device_put(drive); - mutex_unlock(&idescsi_ref_mutex); -} - -static inline idescsi_scsi_t *scsihost_to_idescsi(struct Scsi_Host *host) -{ - return (idescsi_scsi_t*) (&host[1]); -} - -static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive) -{ - return scsihost_to_idescsi(ide_drive->driver_data); -} - -static void ide_scsi_hex_dump(u8 *data, int len) -{ - print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0); -} - -static int idescsi_end_request(ide_drive_t *, int, int); - -static void ide_scsi_callback(ide_drive_t *drive, int dsc) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct ide_atapi_pc *pc = drive->pc; - - if (pc->flags & PC_FLAG_TIMEDOUT) - debug_log("%s: got timed out packet %lu at %lu\n", __func__, - pc->scsi_cmd->serial_number, jiffies); - /* end this request now - scsi should retry it*/ - else if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk(KERN_INFO "Packet command completed, %d bytes" - " transferred\n", pc->xferred); - - idescsi_end_request(drive, 1, 0); -} - -static int idescsi_check_condition(ide_drive_t *drive, - struct request *failed_cmd) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct ide_atapi_pc *pc; - struct request *rq; - u8 *buf; - - /* stuff a sense request in front of our current request */ - pc = kzalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC); - rq = blk_get_request(drive->queue, READ, GFP_ATOMIC); - buf = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_ATOMIC); - if (!pc || !rq || !buf) { - kfree(buf); - if (rq) - blk_put_request(rq); - kfree(pc); - return -ENOMEM; - } - rq->special = (char *) pc; - pc->rq = rq; - pc->buf = buf; - pc->c[0] = REQUEST_SENSE; - pc->c[4] = pc->req_xfer = pc->buf_size = SCSI_SENSE_BUFFERSIZE; - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - pc->timeout = jiffies + WAIT_READY; - /* NOTE! Save the failed packet command in "rq->buffer" */ - rq->buffer = (void *) failed_cmd->special; - pc->scsi_cmd = ((struct ide_atapi_pc *) failed_cmd->special)->scsi_cmd; - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { - printk ("ide-scsi: %s: queue cmd = ", drive->name); - ide_scsi_hex_dump(pc->c, 6); - } - rq->rq_disk = scsi->disk; - rq->ref_count++; - memcpy(rq->cmd, pc->c, 12); - ide_do_drive_cmd(drive, rq); - return 0; -} - -static ide_startstop_t -idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) -{ - ide_hwif_t *hwif = drive->hwif; - - if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) - /* force an abort */ - hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); - - rq->errors++; - - idescsi_end_request(drive, 0, 0); - - return ide_stopped; -} - -static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - struct request *rq = HWGROUP(drive)->rq; - struct ide_atapi_pc *pc = (struct ide_atapi_pc *) rq->special; - int log = test_bit(IDESCSI_LOG_CMD, &scsi->log); - struct Scsi_Host *host; - int errors = rq->errors; - unsigned long flags; - - if (!blk_special_request(rq) && !blk_sense_request(rq)) { - ide_end_request(drive, uptodate, nrsecs); - return 0; - } - ide_end_drive_cmd (drive, 0, 0); - if (blk_sense_request(rq)) { - struct ide_atapi_pc *opc = (struct ide_atapi_pc *) rq->buffer; - if (log) { - printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number); - ide_scsi_hex_dump(pc->buf, 16); - } - memcpy((void *) opc->scsi_cmd->sense_buffer, pc->buf, - SCSI_SENSE_BUFFERSIZE); - kfree(pc->buf); - kfree(pc); - blk_put_request(rq); - pc = opc; - rq = pc->rq; - pc->scsi_cmd->result = (CHECK_CONDITION << 1) | - (((pc->flags & PC_FLAG_TIMEDOUT) ? - DID_TIME_OUT : - DID_OK) << 16); - } else if (pc->flags & PC_FLAG_TIMEDOUT) { - if (log) - printk (KERN_WARNING "ide-scsi: %s: timed out for %lu\n", - drive->name, pc->scsi_cmd->serial_number); - pc->scsi_cmd->result = DID_TIME_OUT << 16; - } else if (errors >= ERROR_MAX) { - pc->scsi_cmd->result = DID_ERROR << 16; - if (log) - printk ("ide-scsi: %s: I/O error for %lu\n", drive->name, pc->scsi_cmd->serial_number); - } else if (errors) { - if (log) - printk ("ide-scsi: %s: check condition for %lu\n", drive->name, pc->scsi_cmd->serial_number); - if (!idescsi_check_condition(drive, rq)) - /* we started a request sense, so we'll be back, exit for now */ - return 0; - pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16); - } else { - pc->scsi_cmd->result = DID_OK << 16; - } - host = pc->scsi_cmd->device->host; - spin_lock_irqsave(host->host_lock, flags); - pc->done(pc->scsi_cmd); - spin_unlock_irqrestore(host->host_lock, flags); - kfree(pc); - blk_put_request(rq); - drive->pc = NULL; - return 0; -} - -static inline int idescsi_set_direction(struct ide_atapi_pc *pc) -{ - switch (pc->c[0]) { - case READ_6: case READ_10: case READ_12: - pc->flags &= ~PC_FLAG_WRITING; - return 0; - case WRITE_6: case WRITE_10: case WRITE_12: - pc->flags |= PC_FLAG_WRITING; - return 0; - default: - return 1; - } -} - -static int idescsi_map_sg(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - ide_hwif_t *hwif = drive->hwif; - struct scatterlist *sg, *scsi_sg; - int segments; - - if (!pc->req_xfer || pc->req_xfer % 1024) - return 1; - - if (idescsi_set_direction(pc)) - return 1; - - sg = hwif->sg_table; - scsi_sg = scsi_sglist(pc->scsi_cmd); - segments = scsi_sg_count(pc->scsi_cmd); - - if (segments > hwif->sg_max_nents) - return 1; - - hwif->sg_nents = segments; - memcpy(sg, scsi_sg, sizeof(*sg) * segments); - - return 0; -} - -static ide_startstop_t idescsi_issue_pc(ide_drive_t *drive, - struct ide_atapi_pc *pc) -{ - /* Set the current packet command */ - drive->pc = pc; - - return ide_issue_pc(drive, ide_scsi_get_timeout(pc), ide_scsi_expiry); -} - -/* - * idescsi_do_request is our request handling function. - */ -static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *rq, sector_t block) -{ - debug_log("dev: %s, cmd: %x, errors: %d\n", rq->rq_disk->disk_name, - rq->cmd[0], rq->errors); - debug_log("sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n", - rq->sector, rq->nr_sectors, rq->current_nr_sectors); - - if (blk_sense_request(rq) || blk_special_request(rq)) { - struct ide_atapi_pc *pc = (struct ide_atapi_pc *)rq->special; - - if ((drive->dev_flags & IDE_DFLAG_USING_DMA) && - idescsi_map_sg(drive, pc) == 0) - pc->flags |= PC_FLAG_DMA_OK; - - return idescsi_issue_pc(drive, pc); - } - blk_dump_rq_flags(rq, "ide-scsi: unsup command"); - idescsi_end_request (drive, 0, 0); - return ide_stopped; -} - -#ifdef CONFIG_IDE_PROC_FS -static ide_proc_entry_t idescsi_proc[] = { - { "capacity", S_IFREG|S_IRUGO, proc_ide_read_capacity, NULL }, - { NULL, 0, NULL, NULL } -}; - -#define ide_scsi_devset_get(name, field) \ -static int get_##name(ide_drive_t *drive) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - return scsi->field; \ -} - -#define ide_scsi_devset_set(name, field) \ -static int set_##name(ide_drive_t *drive, int arg) \ -{ \ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ - scsi->field = arg; \ - return 0; \ -} - -#define ide_scsi_devset_rw_field(_name, _field) \ -ide_scsi_devset_get(_name, _field); \ -ide_scsi_devset_set(_name, _field); \ -IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name); - -ide_devset_rw_field(bios_cyl, bios_cyl); -ide_devset_rw_field(bios_head, bios_head); -ide_devset_rw_field(bios_sect, bios_sect); - -ide_scsi_devset_rw_field(transform, transform); -ide_scsi_devset_rw_field(log, log); - -static const struct ide_proc_devset idescsi_settings[] = { - IDE_PROC_DEVSET(bios_cyl, 0, 1023), - IDE_PROC_DEVSET(bios_head, 0, 255), - IDE_PROC_DEVSET(bios_sect, 0, 63), - IDE_PROC_DEVSET(log, 0, 1), - IDE_PROC_DEVSET(transform, 0, 3), - { 0 }, -}; - -static ide_proc_entry_t *ide_scsi_proc_entries(ide_drive_t *drive) -{ - return idescsi_proc; -} - -static const struct ide_proc_devset *ide_scsi_proc_devsets(ide_drive_t *drive) -{ - return idescsi_settings; -} -#endif - -/* - * Driver initialization. - */ -static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) -{ - clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); -#if IDESCSI_DEBUG_LOG - set_bit(IDESCSI_LOG_CMD, &scsi->log); -#endif /* IDESCSI_DEBUG_LOG */ - - drive->pc_callback = ide_scsi_callback; - drive->pc_update_buffers = NULL; - drive->pc_io_buffers = ide_io_buffers; - - ide_proc_register_driver(drive, scsi->driver); -} - -static void ide_scsi_remove(ide_drive_t *drive) -{ - struct Scsi_Host *scsihost = drive->driver_data; - struct ide_scsi_obj *scsi = scsihost_to_idescsi(scsihost); - struct gendisk *g = scsi->disk; - - scsi_remove_host(scsihost); - ide_proc_unregister_driver(drive, scsi->driver); - - ide_unregister_region(g); - - drive->driver_data = NULL; - g->private_data = NULL; - put_disk(g); - - ide_scsi_put(scsi); - - drive->dev_flags &= ~IDE_DFLAG_SCSI; -} - -static int ide_scsi_probe(ide_drive_t *); - -static ide_driver_t idescsi_driver = { - .gen_driver = { - .owner = THIS_MODULE, - .name = "ide-scsi", - .bus = &ide_bus_type, - }, - .probe = ide_scsi_probe, - .remove = ide_scsi_remove, - .version = IDESCSI_VERSION, - .do_request = idescsi_do_request, - .end_request = idescsi_end_request, - .error = idescsi_atapi_error, -#ifdef CONFIG_IDE_PROC_FS - .proc_entries = ide_scsi_proc_entries, - .proc_devsets = ide_scsi_proc_devsets, -#endif -}; - -static int idescsi_ide_open(struct block_device *bdev, fmode_t mode) -{ - struct ide_scsi_obj *scsi = ide_scsi_get(bdev->bd_disk); - - if (!scsi) - return -ENXIO; - - return 0; -} - -static int idescsi_ide_release(struct gendisk *disk, fmode_t mode) -{ - ide_scsi_put(ide_scsi_g(disk)); - return 0; -} - -static int idescsi_ide_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct ide_scsi_obj *scsi = ide_scsi_g(bdev->bd_disk); - return generic_ide_ioctl(scsi->drive, bdev, cmd, arg); -} - -static struct block_device_operations idescsi_ops = { - .owner = THIS_MODULE, - .open = idescsi_ide_open, - .release = idescsi_ide_release, - .locked_ioctl = idescsi_ide_ioctl, -}; - -static int idescsi_slave_configure(struct scsi_device * sdp) -{ - /* Configure detected device */ - sdp->use_10_for_rw = 1; - sdp->use_10_for_ms = 1; - scsi_adjust_queue_depth(sdp, MSG_SIMPLE_TAG, sdp->host->cmd_per_lun); - return 0; -} - -static const char *idescsi_info (struct Scsi_Host *host) -{ - return "SCSI host adapter emulation for IDE ATAPI devices"; -} - -static int idescsi_ioctl (struct scsi_device *dev, int cmd, void __user *arg) -{ - idescsi_scsi_t *scsi = scsihost_to_idescsi(dev->host); - - if (cmd == SG_SET_TRANSFORM) { - if (arg) - set_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); - else - clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); - return 0; - } else if (cmd == SG_GET_TRANSFORM) - return put_user(test_bit(IDESCSI_SG_TRANSFORM, &scsi->transform), (int __user *) arg); - return -EINVAL; -} - -static int idescsi_queue (struct scsi_cmnd *cmd, - void (*done)(struct scsi_cmnd *)) -{ - struct Scsi_Host *host = cmd->device->host; - idescsi_scsi_t *scsi = scsihost_to_idescsi(host); - ide_drive_t *drive = scsi->drive; - struct request *rq = NULL; - struct ide_atapi_pc *pc = NULL; - int write = cmd->sc_data_direction == DMA_TO_DEVICE; - - if (!drive) { - scmd_printk (KERN_ERR, cmd, "drive not present\n"); - goto abort; - } - scsi = drive_to_idescsi(drive); - pc = kmalloc(sizeof(struct ide_atapi_pc), GFP_ATOMIC); - rq = blk_get_request(drive->queue, write, GFP_ATOMIC); - if (rq == NULL || pc == NULL) { - printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name); - goto abort; - } - - memset (pc->c, 0, 12); - pc->flags = 0; - if (cmd->sc_data_direction == DMA_TO_DEVICE) - pc->flags |= PC_FLAG_WRITING; - pc->rq = rq; - memcpy (pc->c, cmd->cmnd, cmd->cmd_len); - pc->buf = NULL; - pc->sg = scsi_sglist(cmd); - pc->sg_cnt = scsi_sg_count(cmd); - pc->b_count = 0; - pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); - pc->scsi_cmd = cmd; - pc->done = done; - pc->timeout = jiffies + cmd->request->timeout; - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { - printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); - ide_scsi_hex_dump(cmd->cmnd, cmd->cmd_len); - if (memcmp(pc->c, cmd->cmnd, cmd->cmd_len)) { - printk ("ide-scsi: %s: que %lu, tsl = ", drive->name, cmd->serial_number); - ide_scsi_hex_dump(pc->c, 12); - } - } - - rq->special = (char *) pc; - rq->cmd_type = REQ_TYPE_SPECIAL; - spin_unlock_irq(host->host_lock); - rq->ref_count++; - memcpy(rq->cmd, pc->c, 12); - blk_execute_rq_nowait(drive->queue, scsi->disk, rq, 0, NULL); - spin_lock_irq(host->host_lock); - return 0; -abort: - kfree (pc); - if (rq) - blk_put_request(rq); - cmd->result = DID_ERROR << 16; - done(cmd); - return 0; -} - -static int idescsi_eh_abort (struct scsi_cmnd *cmd) -{ - idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host); - ide_drive_t *drive = scsi->drive; - ide_hwif_t *hwif; - ide_hwgroup_t *hwgroup; - int busy; - int ret = FAILED; - - struct ide_atapi_pc *pc; - - /* In idescsi_eh_abort we try to gently pry our command from the ide subsystem */ - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: abort called for %lu\n", cmd->serial_number); - - if (!drive) { - printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_abort\n"); - WARN_ON(1); - goto no_drive; - } - - hwif = drive->hwif; - hwgroup = hwif->hwgroup; - - /* First give it some more time, how much is "right" is hard to say :-( - FIXME - uses mdelay which causes latency? */ - busy = ide_wait_not_busy(hwif, 100); - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: drive did%s become ready\n", busy?" not":""); - - spin_lock_irq(&hwgroup->lock); - - /* If there is no pc running we're done (our interrupt took care of it) */ - pc = drive->pc; - if (pc == NULL) { - ret = SUCCESS; - goto ide_unlock; - } - - /* It's somewhere in flight. Does ide subsystem agree? */ - if (pc->scsi_cmd->serial_number == cmd->serial_number && !busy && - elv_queue_empty(drive->queue) && HWGROUP(drive)->rq != pc->rq) { - /* - * FIXME - not sure this condition can ever occur - */ - printk (KERN_ERR "ide-scsi: cmd aborted!\n"); - - if (blk_sense_request(pc->rq)) - kfree(pc->buf); - /* we need to call blk_put_request twice. */ - blk_put_request(pc->rq); - blk_put_request(pc->rq); - kfree(pc); - drive->pc = NULL; - - ret = SUCCESS; - } - -ide_unlock: - spin_unlock_irq(&hwgroup->lock); -no_drive: - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: abort returns %s\n", ret == SUCCESS?"success":"failed"); - - return ret; -} - -static int idescsi_eh_reset (struct scsi_cmnd *cmd) -{ - struct request *req; - idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host); - ide_drive_t *drive = scsi->drive; - ide_hwgroup_t *hwgroup; - int ready = 0; - int ret = SUCCESS; - - struct ide_atapi_pc *pc; - - /* In idescsi_eh_reset we forcefully remove the command from the ide subsystem and reset the device. */ - - if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) - printk (KERN_WARNING "ide-scsi: reset called for %lu\n", cmd->serial_number); - - if (!drive) { - printk (KERN_WARNING "ide-scsi: Drive not set in idescsi_eh_reset\n"); - WARN_ON(1); - return FAILED; - } - - hwgroup = drive->hwif->hwgroup; - - spin_lock_irq(cmd->device->host->host_lock); - spin_lock(&hwgroup->lock); - - pc = drive->pc; - if (pc) - req = pc->rq; - - if (pc == NULL || req != hwgroup->rq || hwgroup->handler == NULL) { - printk (KERN_WARNING "ide-scsi: No active request in idescsi_eh_reset\n"); - spin_unlock(&hwgroup->lock); - spin_unlock_irq(cmd->device->host->host_lock); - return FAILED; - } - - /* kill current request */ - if (__blk_end_request(req, -EIO, 0)) - BUG(); - if (blk_sense_request(req)) - kfree(pc->buf); - kfree(pc); - drive->pc = NULL; - blk_put_request(req); - - /* now nuke the drive queue */ - while ((req = elv_next_request(drive->queue))) { - if (__blk_end_request(req, -EIO, 0)) - BUG(); - } - - hwgroup->rq = NULL; - hwgroup->handler = NULL; - hwgroup->busy = 1; /* will set this to zero when ide reset finished */ - spin_unlock(&hwgroup->lock); - - ide_do_reset(drive); - - /* ide_do_reset starts a polling handler which restarts itself every 50ms until the reset finishes */ - - do { - spin_unlock_irq(cmd->device->host->host_lock); - msleep(50); - spin_lock_irq(cmd->device->host->host_lock); - } while ( HWGROUP(drive)->handler ); - - ready = drive_is_ready(drive); - HWGROUP(drive)->busy--; - if (!ready) { - printk (KERN_ERR "ide-scsi: reset failed!\n"); - ret = FAILED; - } - - spin_unlock_irq(cmd->device->host->host_lock); - return ret; -} - -static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev, - sector_t capacity, int *parm) -{ - idescsi_scsi_t *idescsi = scsihost_to_idescsi(sdev->host); - ide_drive_t *drive = idescsi->drive; - - if (drive->bios_cyl && drive->bios_head && drive->bios_sect) { - parm[0] = drive->bios_head; - parm[1] = drive->bios_sect; - parm[2] = drive->bios_cyl; - } - return 0; -} - -static struct scsi_host_template idescsi_template = { - .module = THIS_MODULE, - .name = "idescsi", - .info = idescsi_info, - .slave_configure = idescsi_slave_configure, - .ioctl = idescsi_ioctl, - .queuecommand = idescsi_queue, - .eh_abort_handler = idescsi_eh_abort, - .eh_host_reset_handler = idescsi_eh_reset, - .bios_param = idescsi_bios, - .can_queue = 40, - .this_id = -1, - .sg_tablesize = 256, - .cmd_per_lun = 5, - .max_sectors = 128, - .use_clustering = DISABLE_CLUSTERING, - .emulated = 1, - .proc_name = "ide-scsi", -}; - -static int ide_scsi_probe(ide_drive_t *drive) -{ - idescsi_scsi_t *idescsi; - struct Scsi_Host *host; - struct gendisk *g; - static int warned; - int err = -ENOMEM; - u16 last_lun; - - if (!warned && drive->media == ide_cdrom) { - printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n"); - warned = 1; - } - - if (idescsi_nocd && drive->media == ide_cdrom) - return -ENODEV; - - if (!strstr("ide-scsi", drive->driver_req) || - drive->media == ide_disk || - !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) - return -ENODEV; - - drive->dev_flags |= IDE_DFLAG_SCSI; - - g = alloc_disk(1 << PARTN_BITS); - if (!g) - goto out_host_put; - - ide_init_disk(g, drive); - - host->max_id = 1; - - last_lun = drive->id[ATA_ID_LAST_LUN]; - if (last_lun) - debug_log("%s: last_lun=%u\n", drive->name, last_lun); - - if ((last_lun & 7) != 7) - host->max_lun = (last_lun & 7) + 1; - else - host->max_lun = 1; - - drive->driver_data = host; - idescsi = scsihost_to_idescsi(host); - idescsi->drive = drive; - idescsi->driver = &idescsi_driver; - idescsi->host = host; - idescsi->disk = g; - g->private_data = &idescsi->driver; - err = 0; - idescsi_setup(drive, idescsi); - g->fops = &idescsi_ops; - ide_register_region(g); - err = scsi_add_host(host, &drive->gendev); - if (!err) { - scsi_scan_host(host); - return 0; - } - /* fall through on error */ - ide_unregister_region(g); - ide_proc_unregister_driver(drive, &idescsi_driver); - - put_disk(g); -out_host_put: - drive->dev_flags &= ~IDE_DFLAG_SCSI; - scsi_host_put(host); - return err; -} - -static int __init init_idescsi_module(void) -{ - return driver_register(&idescsi_driver.gen_driver); -} - -static void __exit exit_idescsi_module(void) -{ - driver_unregister(&idescsi_driver.gen_driver); -} - -module_param(idescsi_nocd, int, 0600); -MODULE_PARM_DESC(idescsi_nocd, "Disable handling of CD-ROMs so they may be driven by ide-cd"); -module_init(init_idescsi_module); -module_exit(exit_idescsi_module); -MODULE_LICENSE("GPL"); diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 303272af386e..daa00567bc44 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -279,6 +279,13 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, + [PORT_OCTEON] = { + .name = "OCTEON", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO, + }, }; #if defined (CONFIG_SERIAL_8250_AU1X00) @@ -303,16 +310,16 @@ static const u8 au_io_out_map[] = { }; /* sane hardware needs no mapping */ -static inline int map_8250_in_reg(struct uart_8250_port *up, int offset) +static inline int map_8250_in_reg(struct uart_port *p, int offset) { - if (up->port.iotype != UPIO_AU) + if (p->iotype != UPIO_AU) return offset; return au_io_in_map[offset]; } -static inline int map_8250_out_reg(struct uart_8250_port *up, int offset) +static inline int map_8250_out_reg(struct uart_port *p, int offset) { - if (up->port.iotype != UPIO_AU) + if (p->iotype != UPIO_AU) return offset; return au_io_out_map[offset]; } @@ -341,16 +348,16 @@ static const u8 [UART_SCR] = 0x2c }; -static inline int map_8250_in_reg(struct uart_8250_port *up, int offset) +static inline int map_8250_in_reg(struct uart_port *p, int offset) { - if (up->port.iotype != UPIO_RM9000) + if (p->iotype != UPIO_RM9000) return offset; return regmap_in[offset]; } -static inline int map_8250_out_reg(struct uart_8250_port *up, int offset) +static inline int map_8250_out_reg(struct uart_port *p, int offset) { - if (up->port.iotype != UPIO_RM9000) + if (p->iotype != UPIO_RM9000) return offset; return regmap_out[offset]; } @@ -363,108 +370,170 @@ static inline int map_8250_out_reg(struct uart_8250_port *up, int offset) #endif -static unsigned int serial_in(struct uart_8250_port *up, int offset) +static unsigned int hub6_serial_in(struct uart_port *p, int offset) { - unsigned int tmp; - offset = map_8250_in_reg(up, offset) << up->port.regshift; + offset = map_8250_in_reg(p, offset) << p->regshift; + outb(p->hub6 - 1 + offset, p->iobase); + return inb(p->iobase + 1); +} - switch (up->port.iotype) { - case UPIO_HUB6: - outb(up->port.hub6 - 1 + offset, up->port.iobase); - return inb(up->port.iobase + 1); +static void hub6_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + outb(p->hub6 - 1 + offset, p->iobase); + outb(value, p->iobase + 1); +} - case UPIO_MEM: - case UPIO_DWAPB: - return readb(up->port.membase + offset); +static unsigned int mem_serial_in(struct uart_port *p, int offset) +{ + offset = map_8250_in_reg(p, offset) << p->regshift; + return readb(p->membase + offset); +} - case UPIO_RM9000: - case UPIO_MEM32: - return readl(up->port.membase + offset); +static void mem_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + writeb(value, p->membase + offset); +} + +static void mem32_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + writel(value, p->membase + offset); +} + +static unsigned int mem32_serial_in(struct uart_port *p, int offset) +{ + offset = map_8250_in_reg(p, offset) << p->regshift; + return readl(p->membase + offset); +} #ifdef CONFIG_SERIAL_8250_AU1X00 - case UPIO_AU: - return __raw_readl(up->port.membase + offset); +static unsigned int au_serial_in(struct uart_port *p, int offset) +{ + offset = map_8250_in_reg(p, offset) << p->regshift; + return __raw_readl(p->membase + offset); +} + +static void au_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + __raw_writel(value, p->membase + offset); +} #endif - case UPIO_TSI: - if (offset == UART_IIR) { - tmp = readl(up->port.membase + (UART_IIR & ~3)); - return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */ - } else - return readb(up->port.membase + offset); +static unsigned int tsi_serial_in(struct uart_port *p, int offset) +{ + unsigned int tmp; + offset = map_8250_in_reg(p, offset) << p->regshift; + if (offset == UART_IIR) { + tmp = readl(p->membase + (UART_IIR & ~3)); + return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */ + } else + return readb(p->membase + offset); +} - default: - return inb(up->port.iobase + offset); - } +static void tsi_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + if (!((offset == UART_IER) && (value & UART_IER_UUE))) + writeb(value, p->membase + offset); } -static void -serial_out(struct uart_8250_port *up, int offset, int value) +static void dwapb_serial_out(struct uart_port *p, int offset, int value) { - /* Save the offset before it's remapped */ int save_offset = offset; - offset = map_8250_out_reg(up, offset) << up->port.regshift; + offset = map_8250_out_reg(p, offset) << p->regshift; + /* Save the LCR value so it can be re-written when a + * Busy Detect interrupt occurs. */ + if (save_offset == UART_LCR) { + struct uart_8250_port *up = (struct uart_8250_port *)p; + up->lcr = value; + } + writeb(value, p->membase + offset); + /* Read the IER to ensure any interrupt is cleared before + * returning from ISR. */ + if (save_offset == UART_TX || save_offset == UART_IER) + value = p->serial_in(p, UART_IER); +} - switch (up->port.iotype) { +static unsigned int io_serial_in(struct uart_port *p, int offset) +{ + offset = map_8250_in_reg(p, offset) << p->regshift; + return inb(p->iobase + offset); +} + +static void io_serial_out(struct uart_port *p, int offset, int value) +{ + offset = map_8250_out_reg(p, offset) << p->regshift; + outb(value, p->iobase + offset); +} + +static void set_io_from_upio(struct uart_port *p) +{ + switch (p->iotype) { case UPIO_HUB6: - outb(up->port.hub6 - 1 + offset, up->port.iobase); - outb(value, up->port.iobase + 1); + p->serial_in = hub6_serial_in; + p->serial_out = hub6_serial_out; break; case UPIO_MEM: - writeb(value, up->port.membase + offset); + p->serial_in = mem_serial_in; + p->serial_out = mem_serial_out; break; case UPIO_RM9000: case UPIO_MEM32: - writel(value, up->port.membase + offset); + p->serial_in = mem32_serial_in; + p->serial_out = mem32_serial_out; break; #ifdef CONFIG_SERIAL_8250_AU1X00 case UPIO_AU: - __raw_writel(value, up->port.membase + offset); + p->serial_in = au_serial_in; + p->serial_out = au_serial_out; break; #endif case UPIO_TSI: - if (!((offset == UART_IER) && (value & UART_IER_UUE))) - writeb(value, up->port.membase + offset); + p->serial_in = tsi_serial_in; + p->serial_out = tsi_serial_out; break; case UPIO_DWAPB: - /* Save the LCR value so it can be re-written when a - * Busy Detect interrupt occurs. */ - if (save_offset == UART_LCR) - up->lcr = value; - writeb(value, up->port.membase + offset); - /* Read the IER to ensure any interrupt is cleared before - * returning from ISR. */ - if (save_offset == UART_TX || save_offset == UART_IER) - value = serial_in(up, UART_IER); + p->serial_in = mem_serial_in; + p->serial_out = dwapb_serial_out; break; default: - outb(value, up->port.iobase + offset); + p->serial_in = io_serial_in; + p->serial_out = io_serial_out; + break; } } static void serial_out_sync(struct uart_8250_port *up, int offset, int value) { - switch (up->port.iotype) { + struct uart_port *p = &up->port; + switch (p->iotype) { case UPIO_MEM: case UPIO_MEM32: #ifdef CONFIG_SERIAL_8250_AU1X00 case UPIO_AU: #endif case UPIO_DWAPB: - serial_out(up, offset, value); - serial_in(up, UART_LCR); /* safe, no side-effects */ + p->serial_out(p, offset, value); + p->serial_in(p, UART_LCR); /* safe, no side-effects */ break; default: - serial_out(up, offset, value); + p->serial_out(p, offset, value); } } +#define serial_in(up, offset) \ + (up->port.serial_in(&(up)->port, (offset))) +#define serial_out(up, offset, value) \ + (up->port.serial_out(&(up)->port, (offset), (value))) /* * We used to support using pause I/O for certain machines. We * haven't supported this for a while, but just in case it's badly @@ -2576,6 +2645,7 @@ static void __init serial8250_isa_init_ports(void) up->port.membase = old_serial_port[i].iomem_base; up->port.iotype = old_serial_port[i].io_type; up->port.regshift = old_serial_port[i].iomem_reg_shift; + set_io_from_upio(&up->port); if (share_irqs) up->port.flags |= UPF_SHARE_IRQ; } @@ -2752,12 +2822,30 @@ static struct uart_driver serial8250_reg = { */ int __init early_serial_setup(struct uart_port *port) { + struct uart_port *p; + if (port->line >= ARRAY_SIZE(serial8250_ports)) return -ENODEV; serial8250_isa_init_ports(); - serial8250_ports[port->line].port = *port; - serial8250_ports[port->line].port.ops = &serial8250_pops; + p = &serial8250_ports[port->line].port; + p->iobase = port->iobase; + p->membase = port->membase; + p->irq = port->irq; + p->uartclk = port->uartclk; + p->fifosize = port->fifosize; + p->regshift = port->regshift; + p->iotype = port->iotype; + p->flags = port->flags; + p->mapbase = port->mapbase; + p->private_data = port->private_data; + + set_io_from_upio(p); + if (port->serial_in) + p->serial_in = port->serial_in; + if (port->serial_out) + p->serial_out = port->serial_out; + return 0; } @@ -2822,6 +2910,9 @@ static int __devinit serial8250_probe(struct platform_device *dev) port.mapbase = p->mapbase; port.hub6 = p->hub6; port.private_data = p->private_data; + port.type = p->type; + port.serial_in = p->serial_in; + port.serial_out = p->serial_out; port.dev = &dev->dev; if (share_irqs) port.flags |= UPF_SHARE_IRQ; @@ -2976,6 +3067,20 @@ int serial8250_register_port(struct uart_port *port) if (port->dev) uart->port.dev = port->dev; + if (port->flags & UPF_FIXED_TYPE) { + uart->port.type = port->type; + uart->port.fifosize = uart_config[port->type].fifo_size; + uart->capabilities = uart_config[port->type].flags; + uart->tx_loadsz = uart_config[port->type].tx_loadsz; + } + + set_io_from_upio(&uart->port); + /* Possibly override default I/O functions. */ + if (port->serial_in) + uart->port.serial_in = port->serial_in; + if (port->serial_out) + uart->port.serial_out = port->serial_out; + ret = uart_add_one_port(&serial8250_reg, &uart->port); if (ret == 0) ret = uart->port.line; diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 5450a0e5ecdb..c088146b7513 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -42,7 +42,8 @@ struct pci_serial_quirk { u32 subvendor; u32 subdevice; int (*init)(struct pci_dev *dev); - int (*setup)(struct serial_private *, struct pciserial_board *, + int (*setup)(struct serial_private *, + const struct pciserial_board *, struct uart_port *, int); void (*exit)(struct pci_dev *dev); }; @@ -107,7 +108,7 @@ setup_port(struct serial_private *priv, struct uart_port *port, * ADDI-DATA GmbH communication cards <info@addi-data.com> */ static int addidata_apci7800_setup(struct serial_private *priv, - struct pciserial_board *board, + const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar = 0, offset = board->first_offset; @@ -134,7 +135,7 @@ static int addidata_apci7800_setup(struct serial_private *priv, * Not that ugly ;) -- HW */ static int -afavlab_setup(struct serial_private *priv, struct pciserial_board *board, +afavlab_setup(struct serial_private *priv, const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -188,8 +189,9 @@ static int pci_hp_diva_init(struct pci_dev *dev) * some serial ports are supposed to be hidden on certain models. */ static int -pci_hp_diva_setup(struct serial_private *priv, struct pciserial_board *board, - struct uart_port *port, int idx) +pci_hp_diva_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_port *port, int idx) { unsigned int offset = board->first_offset; unsigned int bar = FL_GET_BASE(board->flags); @@ -306,7 +308,7 @@ static void __devexit pci_plx9050_exit(struct pci_dev *dev) /* SBS Technologies Inc. PMC-OCTPRO and P-OCTAL cards */ static int -sbs_setup(struct serial_private *priv, struct pciserial_board *board, +sbs_setup(struct serial_private *priv, const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -463,7 +465,7 @@ static int pci_siig_init(struct pci_dev *dev) } static int pci_siig_setup(struct serial_private *priv, - struct pciserial_board *board, + const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar = FL_GET_BASE(board->flags) + idx, offset = 0; @@ -534,7 +536,8 @@ static int pci_timedia_init(struct pci_dev *dev) * Ugh, this is ugly as all hell --- TYT */ static int -pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board, +pci_timedia_setup(struct serial_private *priv, + const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar = 0, offset = board->first_offset; @@ -568,7 +571,7 @@ pci_timedia_setup(struct serial_private *priv, struct pciserial_board *board, */ static int titan_400l_800l_setup(struct serial_private *priv, - struct pciserial_board *board, + const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset; @@ -737,8 +740,41 @@ static void __devexit pci_ite887x_exit(struct pci_dev *dev) release_region(ioport, ITE_887x_IOSIZE); } +/* + * Oxford Semiconductor Inc. + * Check that device is part of the Tornado range of devices, then determine + * the number of ports available on the device. + */ +static int pci_oxsemi_tornado_init(struct pci_dev *dev) +{ + u8 __iomem *p; + unsigned long deviceID; + unsigned int number_uarts = 0; + + /* OxSemi Tornado devices are all 0xCxxx */ + if (dev->vendor == PCI_VENDOR_ID_OXSEMI && + (dev->device & 0xF000) != 0xC000) + return 0; + + p = pci_iomap(dev, 0, 5); + if (p == NULL) + return -ENOMEM; + + deviceID = ioread32(p); + /* Tornado device */ + if (deviceID == 0x07000200) { + number_uarts = ioread8(p + 4); + printk(KERN_DEBUG + "%d ports detected on Oxford PCI Express device\n", + number_uarts); + } + pci_iounmap(dev, p); + return number_uarts; +} + static int -pci_default_setup(struct serial_private *priv, struct pciserial_board *board, +pci_default_setup(struct serial_private *priv, + const struct pciserial_board *board, struct uart_port *port, int idx) { unsigned int bar, offset = board->first_offset, maxnr; @@ -1018,6 +1054,25 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = pci_default_setup, }, /* + * For Oxford Semiconductor and Mainpine + */ + { + .vendor = PCI_VENDOR_ID_OXSEMI, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_oxsemi_tornado_init, + .setup = pci_default_setup, + }, + { + .vendor = PCI_VENDOR_ID_MAINPINE, + .device = PCI_ANY_ID, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .init = pci_oxsemi_tornado_init, + .setup = pci_default_setup, + }, + /* * Default "match everything" terminator entry */ { @@ -1048,7 +1103,7 @@ static struct pci_serial_quirk *find_quirk(struct pci_dev *dev) } static inline int get_pci_irq(struct pci_dev *dev, - struct pciserial_board *board) + const struct pciserial_board *board) { if (board->flags & FL_NOIRQ) return 0; @@ -1843,8 +1898,8 @@ serial_pci_guess_board(struct pci_dev *dev, struct pciserial_board *board) } static inline int -serial_pci_matches(struct pciserial_board *board, - struct pciserial_board *guessed) +serial_pci_matches(const struct pciserial_board *board, + const struct pciserial_board *guessed) { return board->num_ports == guessed->num_ports && @@ -1854,54 +1909,14 @@ serial_pci_matches(struct pciserial_board *board, board->first_offset == guessed->first_offset; } -/* - * Oxford Semiconductor Inc. - * Check that device is part of the Tornado range of devices, then determine - * the number of ports available on the device. - */ -static int pci_oxsemi_tornado_init(struct pci_dev *dev, struct pciserial_board *board) -{ - u8 __iomem *p; - unsigned long deviceID; - unsigned int number_uarts; - - /* OxSemi Tornado devices are all 0xCxxx */ - if (dev->vendor == PCI_VENDOR_ID_OXSEMI && - (dev->device & 0xF000) != 0xC000) - return 0; - - p = pci_iomap(dev, 0, 5); - if (p == NULL) - return -ENOMEM; - - deviceID = ioread32(p); - /* Tornado device */ - if (deviceID == 0x07000200) { - number_uarts = ioread8(p + 4); - board->num_ports = number_uarts; - printk(KERN_DEBUG - "%d ports detected on Oxford PCI Express device\n", - number_uarts); - } - pci_iounmap(dev, p); - return 0; -} - struct serial_private * -pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board) +pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board) { struct uart_port serial_port; struct serial_private *priv; struct pci_serial_quirk *quirk; int rc, nr_ports, i; - /* - * Find number of ports on board - */ - if (dev->vendor == PCI_VENDOR_ID_OXSEMI || - dev->vendor == PCI_VENDOR_ID_MAINPINE) - pci_oxsemi_tornado_init(dev, board); - nr_ports = board->num_ports; /* @@ -2028,7 +2043,8 @@ static int __devinit pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) { struct serial_private *priv; - struct pciserial_board *board, tmp; + const struct pciserial_board *board; + struct pciserial_board tmp; int rc; if (ent->driver_data >= ARRAY_SIZE(pci_boards)) { @@ -2055,7 +2071,7 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) * We matched one of our class entries. Try to * determine the parameters of this board. */ - rc = serial_pci_guess_board(dev, board); + rc = serial_pci_guess_board(dev, &tmp); if (rc) goto disable; } else { @@ -2271,6 +2287,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_COMM8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_8_115200 }, + { PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_7803, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, + pbn_b2_8_460800 }, { PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_UCOMM8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_8_115200 }, @@ -2372,6 +2391,9 @@ static struct pci_device_id serial_pci_tbl[] = { * For now just used the hex ID 0x950a. */ { PCI_VENDOR_ID_OXSEMI, 0x950a, + PCI_SUBVENDOR_ID_SIIG, PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL, 0, 0, + pbn_b0_2_115200 }, + { PCI_VENDOR_ID_OXSEMI, 0x950a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_2_1130000 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index 569f0e2476c6..318d69dce8e1 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -22,7 +22,8 @@ #include <linux/tty_flip.h> #include <linux/serial_core.h> -#ifdef CONFIG_KGDB_UART +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) #include <linux/kgdb.h> #include <asm/irq_regs.h> #endif @@ -45,6 +46,16 @@ static struct bfin_serial_port bfin_serial_ports[BFIN_UART_NR_PORTS]; static int nr_active_ports = ARRAY_SIZE(bfin_serial_resource); +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) + +# ifndef CONFIG_SERIAL_BFIN_PIO +# error KGDB only support UART in PIO mode. +# endif + +static int kgdboc_port_line; +static int kgdboc_break_enabled; +#endif /* * Setup for console. Argument comes from the menuconfig */ @@ -62,13 +73,17 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart); static void bfin_serial_mctrl_check(struct bfin_serial_port *uart); +static void bfin_serial_reset_irda(struct uart_port *port); + /* * interrupts are disabled on entry */ static void bfin_serial_stop_tx(struct uart_port *port) { struct bfin_serial_port *uart = (struct bfin_serial_port *)port; +#ifdef CONFIG_SERIAL_BFIN_DMA struct circ_buf *xmit = &uart->port.info->xmit; +#endif while (!(UART_GET_LSR(uart) & TEMT)) cpu_relax(); @@ -94,6 +109,14 @@ static void bfin_serial_stop_tx(struct uart_port *port) static void bfin_serial_start_tx(struct uart_port *port) { struct bfin_serial_port *uart = (struct bfin_serial_port *)port; + struct tty_struct *tty = uart->port.info->port.tty; + + /* + * To avoid losting RX interrupt, we reset IR function + * before sending data. + */ + if (tty->termios->c_line == N_IRDA) + bfin_serial_reset_irda(port); #ifdef CONFIG_SERIAL_BFIN_DMA if (uart->tx_done) @@ -110,9 +133,7 @@ static void bfin_serial_start_tx(struct uart_port *port) static void bfin_serial_stop_rx(struct uart_port *port) { struct bfin_serial_port *uart = (struct bfin_serial_port *)port; -#ifdef CONFIG_KGDB_UART - if (uart->port.line != CONFIG_KGDB_UART_PORT) -#endif + UART_CLEAR_IER(uart, ERBFI); } @@ -123,49 +144,6 @@ static void bfin_serial_enable_ms(struct uart_port *port) { } -#ifdef CONFIG_KGDB_UART -static int kgdb_entry_state; - -void kgdb_put_debug_char(int chr) -{ - struct bfin_serial_port *uart; - - if (CONFIG_KGDB_UART_PORT < 0 - || CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS) - uart = &bfin_serial_ports[0]; - else - uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT]; - - while (!(UART_GET_LSR(uart) & THRE)) { - SSYNC(); - } - - UART_CLEAR_DLAB(uart); - UART_PUT_CHAR(uart, (unsigned char)chr); - SSYNC(); -} - -int kgdb_get_debug_char(void) -{ - struct bfin_serial_port *uart; - unsigned char chr; - - if (CONFIG_KGDB_UART_PORT < 0 - || CONFIG_KGDB_UART_PORT >= BFIN_UART_NR_PORTS) - uart = &bfin_serial_ports[0]; - else - uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT]; - - while(!(UART_GET_LSR(uart) & DR)) { - SSYNC(); - } - UART_CLEAR_DLAB(uart); - chr = UART_GET_CHAR(uart); - SSYNC(); - - return chr; -} -#endif #if ANOMALY_05000363 && defined(CONFIG_SERIAL_BFIN_PIO) # define UART_GET_ANOMALY_THRESHOLD(uart) ((uart)->anomaly_threshold) @@ -178,7 +156,7 @@ int kgdb_get_debug_char(void) #ifdef CONFIG_SERIAL_BFIN_PIO static void bfin_serial_rx_chars(struct bfin_serial_port *uart) { - struct tty_struct *tty = uart->port.info->port.tty; + struct tty_struct *tty = NULL; unsigned int status, ch, flg; static struct timeval anomaly_start = { .tv_sec = 0 }; @@ -188,27 +166,18 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart) ch = UART_GET_CHAR(uart); uart->port.icount.rx++; -#ifdef CONFIG_KGDB_UART - if (uart->port.line == CONFIG_KGDB_UART_PORT) { - struct pt_regs *regs = get_irq_regs(); - if (uart->port.cons->index == CONFIG_KGDB_UART_PORT && ch == 0x1) { /* Ctrl + A */ - kgdb_breakkey_pressed(regs); - return; - } else if (kgdb_entry_state == 0 && ch == '$') {/* connection from KGDB */ - kgdb_entry_state = 1; - } else if (kgdb_entry_state == 1 && ch == 'q') { - kgdb_entry_state = 0; - kgdb_breakkey_pressed(regs); - return; - } else if (ch == 0x3) {/* Ctrl + C */ - kgdb_entry_state = 0; - kgdb_breakkey_pressed(regs); +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) + if (kgdb_connected && kgdboc_port_line == uart->port.line) + if (ch == 0x3) {/* Ctrl + C */ + kgdb_breakpoint(); return; - } else { - kgdb_entry_state = 0; } - } + + if (!uart->port.info || !uart->port.info->tty) + return; #endif + tty = uart->port.info->tty; if (ANOMALY_05000363) { /* The BF533 (and BF561) family of processors have a nice anomaly @@ -250,6 +219,7 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart) return; known_good_char: + status &= ~BI; anomaly_start.tv_sec = 0; } } @@ -445,7 +415,9 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart) void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart) { - int x_pos, pos; + int x_pos, pos, flags; + + spin_lock_irqsave(&uart->port.lock, flags); uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); x_pos = get_dma_curr_xcount(uart->rx_dma_channel); @@ -463,6 +435,8 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart) uart->rx_dma_buf.tail = uart->rx_dma_buf.head; } + spin_unlock_irqrestore(&uart->port.lock, flags); + mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES); } @@ -497,10 +471,9 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id) spin_lock(&uart->port.lock); irqstat = get_dma_curr_irqstat(uart->rx_dma_channel); clear_dma_irqstat(uart->rx_dma_channel); + bfin_serial_dma_rx_chars(uart); spin_unlock(&uart->port.lock); - mod_timer(&(uart->rx_dma_timer), jiffies); - return IRQ_HANDLED; } #endif @@ -630,16 +603,16 @@ static int bfin_serial_startup(struct uart_port *port) uart->rx_dma_timer.expires = jiffies + DMA_RX_FLUSH_JIFFIES; add_timer(&(uart->rx_dma_timer)); #else +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) + if (kgdboc_port_line == uart->port.line && kgdboc_break_enabled) + kgdboc_break_enabled = 0; + else { +# endif if (request_irq(uart->port.irq, bfin_serial_rx_int, IRQF_DISABLED, "BFIN_UART_RX", uart)) { -# ifdef CONFIG_KGDB_UART - if (uart->port.line != CONFIG_KGDB_UART_PORT) { -# endif printk(KERN_NOTICE "Unable to attach BlackFin UART RX interrupt\n"); return -EBUSY; -# ifdef CONFIG_KGDB_UART - } -# endif } if (request_irq @@ -685,6 +658,10 @@ static int bfin_serial_startup(struct uart_port *port) } } # endif +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) + } +# endif #endif UART_SET_IER(uart, ERBFI); return 0; @@ -716,9 +693,6 @@ static void bfin_serial_shutdown(struct uart_port *port) break; }; #endif -#ifdef CONFIG_KGDB_UART - if (uart->port.line != CONFIG_KGDB_UART_PORT) -#endif free_irq(uart->port.irq, uart); free_irq(uart->port.irq+1, uart); #endif @@ -887,6 +861,65 @@ static void bfin_serial_set_ldisc(struct uart_port *port) } } +#ifdef CONFIG_CONSOLE_POLL +static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr) +{ + struct bfin_serial_port *uart = (struct bfin_serial_port *)port; + + while (!(UART_GET_LSR(uart) & THRE)) + cpu_relax(); + + UART_CLEAR_DLAB(uart); + UART_PUT_CHAR(uart, (unsigned char)chr); +} + +static int bfin_serial_poll_get_char(struct uart_port *port) +{ + struct bfin_serial_port *uart = (struct bfin_serial_port *)port; + unsigned char chr; + + while (!(UART_GET_LSR(uart) & DR)) + cpu_relax(); + + UART_CLEAR_DLAB(uart); + chr = UART_GET_CHAR(uart); + + return chr; +} +#endif + +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) +static void bfin_kgdboc_port_shutdown(struct uart_port *port) +{ + if (kgdboc_break_enabled) { + kgdboc_break_enabled = 0; + bfin_serial_shutdown(port); + } +} + +static int bfin_kgdboc_port_startup(struct uart_port *port) +{ + kgdboc_port_line = port->line; + kgdboc_break_enabled = !bfin_serial_startup(port); + return 0; +} +#endif + +static void bfin_serial_reset_irda(struct uart_port *port) +{ + int line = port->line; + unsigned short val; + + val = UART_GET_GCTL(&bfin_serial_ports[line]); + val &= ~(IREN | RPOLC); + UART_PUT_GCTL(&bfin_serial_ports[line], val); + SSYNC(); + val |= (IREN | RPOLC); + UART_PUT_GCTL(&bfin_serial_ports[line], val); + SSYNC(); +} + static struct uart_ops bfin_serial_pops = { .tx_empty = bfin_serial_tx_empty, .set_mctrl = bfin_serial_set_mctrl, @@ -905,6 +938,15 @@ static struct uart_ops bfin_serial_pops = { .request_port = bfin_serial_request_port, .config_port = bfin_serial_config_port, .verify_port = bfin_serial_verify_port, +#if defined(CONFIG_KGDB_SERIAL_CONSOLE) || \ + defined(CONFIG_KGDB_SERIAL_CONSOLE_MODULE) + .kgdboc_port_startup = bfin_kgdboc_port_startup, + .kgdboc_port_shutdown = bfin_kgdboc_port_shutdown, +#endif +#ifdef CONFIG_CONSOLE_POLL + .poll_put_char = bfin_serial_poll_put_char, + .poll_get_char = bfin_serial_poll_get_char, +#endif }; static void __init bfin_serial_init_ports(void) @@ -950,7 +992,7 @@ static void __init bfin_serial_init_ports(void) } -#ifdef CONFIG_SERIAL_BFIN_CONSOLE +#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK) /* * If the port was already initialised (eg, by a boot loader), * try to determine the current setup. @@ -994,24 +1036,20 @@ bfin_serial_console_get_options(struct bfin_serial_port *uart, int *baud, } pr_debug("%s:baud = %d, parity = %c, bits= %d\n", __func__, *baud, *parity, *bits); } -#endif -#if defined(CONFIG_SERIAL_BFIN_CONSOLE) || defined(CONFIG_EARLY_PRINTK) static struct uart_driver bfin_serial_reg; static int __init bfin_serial_console_setup(struct console *co, char *options) { struct bfin_serial_port *uart; -# ifdef CONFIG_SERIAL_BFIN_CONSOLE int baud = 57600; int bits = 8; int parity = 'n'; -# ifdef CONFIG_SERIAL_BFIN_CTSRTS +# ifdef CONFIG_SERIAL_BFIN_CTSRTS int flow = 'r'; -# else +# else int flow = 'n'; -# endif # endif /* @@ -1023,16 +1061,12 @@ bfin_serial_console_setup(struct console *co, char *options) co->index = 0; uart = &bfin_serial_ports[co->index]; -# ifdef CONFIG_SERIAL_BFIN_CONSOLE if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); else bfin_serial_console_get_options(uart, &baud, &parity, &bits); return uart_set_options(&uart->port, co, baud, parity, bits, flow); -# else - return 0; -# endif } #endif /* defined (CONFIG_SERIAL_BFIN_CONSOLE) || defined (CONFIG_EARLY_PRINTK) */ @@ -1076,10 +1110,7 @@ static int __init bfin_serial_rs_console_init(void) { bfin_serial_init_ports(); register_console(&bfin_serial_console); -#ifdef CONFIG_KGDB_UART - kgdb_entry_state = 0; - init_kgdb_uart(); -#endif + return 0; } console_initcall(bfin_serial_rs_console_init); @@ -1144,7 +1175,7 @@ struct console __init *bfin_earlyserial_init(unsigned int port, return &bfin_early_serial_console; } -#endif /* CONFIG_SERIAL_BFIN_CONSOLE */ +#endif /* CONFIG_EARLY_PRINTK */ static struct uart_driver bfin_serial_reg = { .owner = THIS_MODULE, @@ -1235,10 +1266,6 @@ static struct platform_driver bfin_serial_driver = { static int __init bfin_serial_init(void) { int ret; -#ifdef CONFIG_KGDB_UART - struct bfin_serial_port *uart = &bfin_serial_ports[CONFIG_KGDB_UART_PORT]; - struct ktermios t; -#endif pr_info("Serial: Blackfin serial driver\n"); @@ -1252,21 +1279,6 @@ static int __init bfin_serial_init(void) uart_unregister_driver(&bfin_serial_reg); } } -#ifdef CONFIG_KGDB_UART - if (uart->port.cons->index != CONFIG_KGDB_UART_PORT) { - request_irq(uart->port.irq, bfin_serial_rx_int, - IRQF_DISABLED, "BFIN_UART_RX", uart); - pr_info("Request irq for kgdb uart port\n"); - UART_SET_IER(uart, ERBFI); - SSYNC(); - t.c_cflag = CS8|B57600; - t.c_iflag = 0; - t.c_oflag = 0; - t.c_lflag = ICANON; - t.c_line = CONFIG_KGDB_UART_PORT; - bfin_serial_set_termios(&uart->port, &t, &t); - } -#endif return ret; } @@ -1276,6 +1288,7 @@ static void __exit bfin_serial_exit(void) uart_unregister_driver(&bfin_serial_reg); } + module_init(bfin_serial_init); module_exit(bfin_serial_exit); diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c index dd8564d25051..529c0ff7952c 100644 --- a/drivers/serial/bfin_sport_uart.c +++ b/drivers/serial/bfin_sport_uart.c @@ -99,7 +99,7 @@ static void sport_stop_tx(struct uart_port *port); static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) { - pr_debug("%s value:%x\n", __FUNCTION__, value); + pr_debug("%s value:%x\n", __func__, value); /* Place a Start and Stop bit */ __asm__ volatile ( "R2 = b#01111111100;\n\t" @@ -110,7 +110,7 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) :"=r"(value) :"0"(value) :"R2", "R3"); - pr_debug("%s value:%x\n", __FUNCTION__, value); + pr_debug("%s value:%x\n", __func__, value); SPORT_PUT_TX(up, value); } @@ -120,7 +120,7 @@ static inline unsigned int rx_one_byte(struct sport_uart_port *up) unsigned int value, extract; value = SPORT_GET_RX32(up); - pr_debug("%s value:%x\n", __FUNCTION__, value); + pr_debug("%s value:%x\n", __func__, value); /* Extract 8 bits data */ __asm__ volatile ( @@ -151,12 +151,12 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate) /* Set TCR1 and TCR2 */ SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK)); SPORT_PUT_TCR2(up, 10); - pr_debug("%s TCR1:%x, TCR2:%x\n", __FUNCTION__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); + pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); /* Set RCR1 and RCR2 */ SPORT_PUT_RCR1(up, (RCKFE | LARFS | LRFS | RFSR | IRCLK)); SPORT_PUT_RCR2(up, 28); - pr_debug("%s RCR1:%x, RCR2:%x\n", __FUNCTION__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up)); + pr_debug("%s RCR1:%x, RCR2:%x\n", __func__, SPORT_GET_RCR1(up), SPORT_GET_RCR2(up)); tclkdiv = sclk/(2 * baud_rate) - 1; tfsdiv = 12; @@ -166,7 +166,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate) SPORT_PUT_RCLKDIV(up, rclkdiv); SSYNC(); pr_debug("%s sclk:%d, baud_rate:%d, tclkdiv:%d, tfsdiv:%d, rclkdiv:%d\n", - __FUNCTION__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv); + __func__, sclk, baud_rate, tclkdiv, tfsdiv, rclkdiv); return 0; } @@ -231,7 +231,7 @@ static int sport_startup(struct uart_port *port) char buffer[20]; int retval; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); memset(buffer, 20, '\0'); snprintf(buffer, 20, "%s rx", up->name); retval = request_irq(up->rx_irq, sport_uart_rx_irq, IRQF_SAMPLE_RANDOM, buffer, up); @@ -320,7 +320,7 @@ static unsigned int sport_tx_empty(struct uart_port *port) unsigned int stat; stat = SPORT_GET_STAT(up); - pr_debug("%s stat:%04x\n", __FUNCTION__, stat); + pr_debug("%s stat:%04x\n", __func__, stat); if (stat & TXHRE) { return TIOCSER_TEMT; } else @@ -329,13 +329,13 @@ static unsigned int sport_tx_empty(struct uart_port *port) static unsigned int sport_get_mctrl(struct uart_port *port) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); return (TIOCM_CTS | TIOCM_CD | TIOCM_DSR); } static void sport_set_mctrl(struct uart_port *port, unsigned int mctrl) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); } static void sport_stop_tx(struct uart_port *port) @@ -343,7 +343,7 @@ static void sport_stop_tx(struct uart_port *port) struct sport_uart_port *up = (struct sport_uart_port *)port; unsigned int stat; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); stat = SPORT_GET_STAT(up); while(!(stat & TXHRE)) { @@ -366,21 +366,21 @@ static void sport_start_tx(struct uart_port *port) { struct sport_uart_port *up = (struct sport_uart_port *)port; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); /* Write data into SPORT FIFO before enable SPROT to transmit */ sport_uart_tx_chars(up); /* Enable transmit, then an interrupt will generated */ SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) | TSPEN)); SSYNC(); - pr_debug("%s exit\n", __FUNCTION__); + pr_debug("%s exit\n", __func__); } static void sport_stop_rx(struct uart_port *port) { struct sport_uart_port *up = (struct sport_uart_port *)port; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); /* Disable sport to stop rx */ SPORT_PUT_RCR1(up, (SPORT_GET_RCR1(up) & ~RSPEN)); SSYNC(); @@ -388,19 +388,19 @@ static void sport_stop_rx(struct uart_port *port) static void sport_enable_ms(struct uart_port *port) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); } static void sport_break_ctl(struct uart_port *port, int break_state) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); } static void sport_shutdown(struct uart_port *port) { struct sport_uart_port *up = (struct sport_uart_port *)port; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); /* Disable sport */ SPORT_PUT_TCR1(up, (SPORT_GET_TCR1(up) & ~TSPEN)); @@ -421,7 +421,7 @@ static void sport_shutdown(struct uart_port *port) static void sport_set_termios(struct uart_port *port, struct termios *termios, struct termios *old) { - pr_debug("%s enter, c_cflag:%08x\n", __FUNCTION__, termios->c_cflag); + pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag); uart_update_timeout(port, CS8 ,port->uartclk); } @@ -429,18 +429,18 @@ static const char *sport_type(struct uart_port *port) { struct sport_uart_port *up = (struct sport_uart_port *)port; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); return up->name; } static void sport_release_port(struct uart_port *port) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); } static int sport_request_port(struct uart_port *port) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); return 0; } @@ -448,13 +448,13 @@ static void sport_config_port(struct uart_port *port, int flags) { struct sport_uart_port *up = (struct sport_uart_port *)port; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); up->port.type = PORT_BFIN_SPORT; } static int sport_verify_port(struct uart_port *port, struct serial_struct *ser) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); return 0; } @@ -527,7 +527,7 @@ static int sport_uart_suspend(struct platform_device *dev, pm_message_t state) { struct sport_uart_port *sport = platform_get_drvdata(dev); - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); if (sport) uart_suspend_port(&sport_uart_reg, &sport->port); @@ -538,7 +538,7 @@ static int sport_uart_resume(struct platform_device *dev) { struct sport_uart_port *sport = platform_get_drvdata(dev); - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); if (sport) uart_resume_port(&sport_uart_reg, &sport->port); @@ -547,7 +547,7 @@ static int sport_uart_resume(struct platform_device *dev) static int sport_uart_probe(struct platform_device *dev) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); sport_uart_ports[dev->id].port.dev = &dev->dev; uart_add_one_port(&sport_uart_reg, &sport_uart_ports[dev->id].port); platform_set_drvdata(dev, &sport_uart_ports[dev->id]); @@ -559,7 +559,7 @@ static int sport_uart_remove(struct platform_device *dev) { struct sport_uart_port *sport = platform_get_drvdata(dev); - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); platform_set_drvdata(dev, NULL); if (sport) @@ -582,7 +582,7 @@ static int __init sport_uart_init(void) { int ret; - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); ret = uart_register_driver(&sport_uart_reg); if (ret != 0) { printk(KERN_ERR "Failed to register %s:%d\n", @@ -597,13 +597,13 @@ static int __init sport_uart_init(void) } - pr_debug("%s exit\n", __FUNCTION__); + pr_debug("%s exit\n", __func__); return ret; } static void __exit sport_uart_exit(void) { - pr_debug("%s enter\n", __FUNCTION__); + pr_debug("%s enter\n", __func__); platform_driver_unregister(&sport_uart_driver); uart_unregister_driver(&sport_uart_reg); } diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c index a697914ae3d0..3547558d2caf 100644 --- a/drivers/serial/jsm/jsm_tty.c +++ b/drivers/serial/jsm/jsm_tty.c @@ -272,7 +272,7 @@ static void jsm_tty_close(struct uart_port *port) jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n"); bd = channel->ch_bd; - ts = channel->uart_port.info->port.tty->termios; + ts = port->info->port.tty->termios; channel->ch_flags &= ~(CH_STOPI); diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 874786a11fe9..dc68b7e0c930 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -50,7 +50,7 @@ static struct lock_class_key port_lock_key; #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8) -#define uart_users(state) ((state)->count + ((state)->info ? (state)->info->port.blocked_open : 0)) +#define uart_users(state) ((state)->count + (state)->info.port.blocked_open) #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) ((port)->cons && (port)->cons->index == (port)->line) @@ -94,7 +94,7 @@ static void __uart_start(struct tty_struct *tty) struct uart_state *state = tty->driver_data; struct uart_port *port = state->port; - if (!uart_circ_empty(&state->info->xmit) && state->info->xmit.buf && + if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf && !tty->stopped && !tty->hw_stopped) port->ops->start_tx(port); } @@ -113,7 +113,7 @@ static void uart_start(struct tty_struct *tty) static void uart_tasklet_action(unsigned long data) { struct uart_state *state = (struct uart_state *)data; - tty_wakeup(state->info->port.tty); + tty_wakeup(state->info.port.tty); } static inline void @@ -139,7 +139,7 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) */ static int uart_startup(struct uart_state *state, int init_hw) { - struct uart_info *info = state->info; + struct uart_info *info = &state->info; struct uart_port *port = state->port; unsigned long page; int retval = 0; @@ -212,14 +212,15 @@ static int uart_startup(struct uart_state *state, int init_hw) */ static void uart_shutdown(struct uart_state *state) { - struct uart_info *info = state->info; + struct uart_info *info = &state->info; struct uart_port *port = state->port; + struct tty_struct *tty = info->port.tty; /* * Set the TTY IO error marker */ - if (info->port.tty) - set_bit(TTY_IO_ERROR, &info->port.tty->flags); + if (tty) + set_bit(TTY_IO_ERROR, &tty->flags); if (info->flags & UIF_INITIALIZED) { info->flags &= ~UIF_INITIALIZED; @@ -227,7 +228,7 @@ static void uart_shutdown(struct uart_state *state) /* * Turn off DTR and RTS early. */ - if (!info->port.tty || (info->port.tty->termios->c_cflag & HUPCL)) + if (!tty || (tty->termios->c_cflag & HUPCL)) uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); /* @@ -427,7 +428,7 @@ EXPORT_SYMBOL(uart_get_divisor); static void uart_change_speed(struct uart_state *state, struct ktermios *old_termios) { - struct tty_struct *tty = state->info->port.tty; + struct tty_struct *tty = state->info.port.tty; struct uart_port *port = state->port; struct ktermios *termios; @@ -444,14 +445,14 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios) * Set flags based on termios cflag */ if (termios->c_cflag & CRTSCTS) - state->info->flags |= UIF_CTS_FLOW; + state->info.flags |= UIF_CTS_FLOW; else - state->info->flags &= ~UIF_CTS_FLOW; + state->info.flags &= ~UIF_CTS_FLOW; if (termios->c_cflag & CLOCAL) - state->info->flags &= ~UIF_CHECK_CD; + state->info.flags &= ~UIF_CHECK_CD; else - state->info->flags |= UIF_CHECK_CD; + state->info.flags |= UIF_CHECK_CD; port->ops->set_termios(port, termios, old_termios); } @@ -479,7 +480,7 @@ static int uart_put_char(struct tty_struct *tty, unsigned char ch) { struct uart_state *state = tty->driver_data; - return __uart_put_char(state->port, &state->info->xmit, ch); + return __uart_put_char(state->port, &state->info.xmit, ch); } static void uart_flush_chars(struct tty_struct *tty) @@ -500,13 +501,13 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count) * This means you called this function _after_ the port was * closed. No cookie for you. */ - if (!state || !state->info) { + if (!state) { WARN_ON(1); return -EL3HLT; } port = state->port; - circ = &state->info->xmit; + circ = &state->info.xmit; if (!circ->buf) return 0; @@ -537,7 +538,7 @@ static int uart_write_room(struct tty_struct *tty) int ret; spin_lock_irqsave(&state->port->lock, flags); - ret = uart_circ_chars_free(&state->info->xmit); + ret = uart_circ_chars_free(&state->info.xmit); spin_unlock_irqrestore(&state->port->lock, flags); return ret; } @@ -549,7 +550,7 @@ static int uart_chars_in_buffer(struct tty_struct *tty) int ret; spin_lock_irqsave(&state->port->lock, flags); - ret = uart_circ_chars_pending(&state->info->xmit); + ret = uart_circ_chars_pending(&state->info.xmit); spin_unlock_irqrestore(&state->port->lock, flags); return ret; } @@ -564,7 +565,7 @@ static void uart_flush_buffer(struct tty_struct *tty) * This means you called this function _after_ the port was * closed. No cookie for you. */ - if (!state || !state->info) { + if (!state) { WARN_ON(1); return; } @@ -573,7 +574,7 @@ static void uart_flush_buffer(struct tty_struct *tty) pr_debug("uart_flush_buffer(%d) called\n", tty->index); spin_lock_irqsave(&port->lock, flags); - uart_circ_clear(&state->info->xmit); + uart_circ_clear(&state->info.xmit); if (port->ops->flush_buffer) port->ops->flush_buffer(port); spin_unlock_irqrestore(&port->lock, flags); @@ -837,15 +838,15 @@ static int uart_set_info(struct uart_state *state, state->closing_wait = closing_wait; if (new_serial.xmit_fifo_size) port->fifosize = new_serial.xmit_fifo_size; - if (state->info->port.tty) - state->info->port.tty->low_latency = + if (state->info.port.tty) + state->info.port.tty->low_latency = (port->flags & UPF_LOW_LATENCY) ? 1 : 0; check_and_exit: retval = 0; if (port->type == PORT_UNKNOWN) goto exit; - if (state->info->flags & UIF_INITIALIZED) { + if (state->info.flags & UIF_INITIALIZED) { if (((old_flags ^ port->flags) & UPF_SPD_MASK) || old_custom_divisor != port->custom_divisor) { /* @@ -858,7 +859,7 @@ static int uart_set_info(struct uart_state *state, printk(KERN_NOTICE "%s sets custom speed on %s. This " "is deprecated.\n", current->comm, - tty_name(state->info->port.tty, buf)); + tty_name(state->info.port.tty, buf)); } uart_change_speed(state, NULL); } @@ -889,8 +890,8 @@ static int uart_get_lsr_info(struct uart_state *state, * interrupt happens). */ if (port->x_char || - ((uart_circ_chars_pending(&state->info->xmit) > 0) && - !state->info->port.tty->stopped && !state->info->port.tty->hw_stopped)) + ((uart_circ_chars_pending(&state->info.xmit) > 0) && + !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped)) result &= ~TIOCSER_TEMT; return put_user(result, value); @@ -1017,7 +1018,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg) port->ops->enable_ms(port); spin_unlock_irq(&port->lock); - add_wait_queue(&state->info->delta_msr_wait, &wait); + add_wait_queue(&state->info.delta_msr_wait, &wait); for (;;) { spin_lock_irq(&port->lock); memcpy(&cnow, &port->icount, sizeof(struct uart_icount)); @@ -1045,7 +1046,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg) } current->state = TASK_RUNNING; - remove_wait_queue(&state->info->delta_msr_wait, &wait); + remove_wait_queue(&state->info.delta_msr_wait, &wait); return ret; } @@ -1241,7 +1242,7 @@ static void uart_set_termios(struct tty_struct *tty, */ if (!(old_termios->c_cflag & CLOCAL) && (tty->termios->c_cflag & CLOCAL)) - wake_up_interruptible(&state->info->port.open_wait); + wake_up_interruptible(&info->port.open_wait); #endif } @@ -1303,7 +1304,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp) * At this point, we stop accepting input. To do this, we * disable the receive line status interrupts. */ - if (state->info->flags & UIF_INITIALIZED) { + if (state->info.flags & UIF_INITIALIZED) { unsigned long flags; spin_lock_irqsave(&port->lock, flags); port->ops->stop_rx(port); @@ -1322,9 +1323,9 @@ static void uart_close(struct tty_struct *tty, struct file *filp) tty_ldisc_flush(tty); tty->closing = 0; - state->info->port.tty = NULL; + state->info.port.tty = NULL; - if (state->info->port.blocked_open) { + if (state->info.port.blocked_open) { if (state->close_delay) msleep_interruptible(state->close_delay); } else if (!uart_console(port)) { @@ -1334,8 +1335,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp) /* * Wake up anyone trying to open this port. */ - state->info->flags &= ~UIF_NORMAL_ACTIVE; - wake_up_interruptible(&state->info->port.open_wait); + state->info.flags &= ~UIF_NORMAL_ACTIVE; + wake_up_interruptible(&state->info.port.open_wait); done: mutex_unlock(&state->mutex); @@ -1409,19 +1410,20 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout) static void uart_hangup(struct tty_struct *tty) { struct uart_state *state = tty->driver_data; + struct uart_info *info = &state->info; BUG_ON(!kernel_locked()); pr_debug("uart_hangup(%d)\n", state->port->line); mutex_lock(&state->mutex); - if (state->info && state->info->flags & UIF_NORMAL_ACTIVE) { + if (info->flags & UIF_NORMAL_ACTIVE) { uart_flush_buffer(tty); uart_shutdown(state); state->count = 0; - state->info->flags &= ~UIF_NORMAL_ACTIVE; - state->info->port.tty = NULL; - wake_up_interruptible(&state->info->port.open_wait); - wake_up_interruptible(&state->info->delta_msr_wait); + info->flags &= ~UIF_NORMAL_ACTIVE; + info->port.tty = NULL; + wake_up_interruptible(&info->port.open_wait); + wake_up_interruptible(&info->delta_msr_wait); } mutex_unlock(&state->mutex); } @@ -1434,7 +1436,7 @@ static void uart_hangup(struct tty_struct *tty) */ static void uart_update_termios(struct uart_state *state) { - struct tty_struct *tty = state->info->port.tty; + struct tty_struct *tty = state->info.port.tty; struct uart_port *port = state->port; if (uart_console(port) && port->cons->cflag) { @@ -1469,7 +1471,7 @@ static int uart_block_til_ready(struct file *filp, struct uart_state *state) { DECLARE_WAITQUEUE(wait, current); - struct uart_info *info = state->info; + struct uart_info *info = &state->info; struct uart_port *port = state->port; unsigned int mctrl; @@ -1563,28 +1565,6 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line) ret = -ENXIO; goto err_unlock; } - - /* BKL: RACE HERE - LEAK */ - /* We should move this into the uart_state structure and kill off - this whole complexity */ - if (!state->info) { - state->info = kzalloc(sizeof(struct uart_info), GFP_KERNEL); - if (state->info) { - init_waitqueue_head(&state->info->port.open_wait); - init_waitqueue_head(&state->info->delta_msr_wait); - - /* - * Link the info into the other structures. - */ - state->port->info = state->info; - - tasklet_init(&state->info->tlet, uart_tasklet_action, - (unsigned long)state); - } else { - ret = -ENOMEM; - goto err_unlock; - } - } return state; err_unlock: @@ -1641,9 +1621,10 @@ static int uart_open(struct tty_struct *tty, struct file *filp) * Any failures from here onwards should not touch the count. */ tty->driver_data = state; + state->port->info = &state->info; tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0; tty->alt_speed = 0; - state->info->port.tty = tty; + state->info.port.tty = tty; /* * If the port is in the middle of closing, bail out now. @@ -1676,8 +1657,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp) /* * If this is the first open to succeed, adjust things to suit. */ - if (retval == 0 && !(state->info->flags & UIF_NORMAL_ACTIVE)) { - state->info->flags |= UIF_NORMAL_ACTIVE; + if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) { + state->info.flags |= UIF_NORMAL_ACTIVE; uart_update_termios(state); } @@ -2028,11 +2009,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port) } port->suspended = 1; - if (state->info && state->info->flags & UIF_INITIALIZED) { + if (state->info.flags & UIF_INITIALIZED) { const struct uart_ops *ops = port->ops; int tries; - state->info->flags = (state->info->flags & ~UIF_INITIALIZED) + state->info.flags = (state->info.flags & ~UIF_INITIALIZED) | UIF_SUSPENDED; spin_lock_irq(&port->lock); @@ -2107,15 +2088,15 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) /* * If that's unset, use the tty termios setting. */ - if (state->info && state->info->port.tty && termios.c_cflag == 0) - termios = *state->info->port.tty->termios; + if (state->info.port.tty && termios.c_cflag == 0) + termios = *state->info.port.tty->termios; uart_change_pm(state, 0); port->ops->set_termios(port, &termios, NULL); console_start(port->cons); } - if (state->info && state->info->flags & UIF_SUSPENDED) { + if (state->info.flags & UIF_SUSPENDED) { const struct uart_ops *ops = port->ops; int ret; @@ -2130,7 +2111,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) ops->set_mctrl(port, port->mctrl); ops->start_tx(port); spin_unlock_irq(&port->lock); - state->info->flags |= UIF_INITIALIZED; + state->info.flags |= UIF_INITIALIZED; } else { /* * Failed to resume - maybe hardware went away? @@ -2140,7 +2121,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port) uart_shutdown(state); } - state->info->flags &= ~UIF_SUSPENDED; + state->info.flags &= ~UIF_SUSPENDED; } mutex_unlock(&state->mutex); @@ -2198,11 +2179,14 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, * Now do the auto configuration stuff. Note that config_port * is expected to claim the resources and map the port for us. */ - flags = UART_CONFIG_TYPE; + flags = 0; if (port->flags & UPF_AUTO_IRQ) flags |= UART_CONFIG_IRQ; if (port->flags & UPF_BOOT_AUTOCONF) { - port->type = PORT_UNKNOWN; + if (!(port->flags & UPF_FIXED_TYPE)) { + port->type = PORT_UNKNOWN; + flags |= UART_CONFIG_TYPE; + } port->ops->config_port(port, flags); } @@ -2383,8 +2367,12 @@ int uart_register_driver(struct uart_driver *drv) state->close_delay = 500; /* .5 seconds */ state->closing_wait = 30000; /* 30 seconds */ - mutex_init(&state->mutex); + + tty_port_init(&state->info.port); + init_waitqueue_head(&state->info.delta_msr_wait); + tasklet_init(&state->info.tlet, uart_tasklet_action, + (unsigned long)state); } retval = tty_register_driver(normal); @@ -2455,7 +2443,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port) state->pm_state = -1; port->cons = drv->cons; - port->info = state->info; + port->info = &state->info; /* * If this port is a console, then the spinlock is already @@ -2527,18 +2515,11 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port) */ tty_unregister_device(drv->tty_driver, port->line); - info = state->info; + info = &state->info; if (info && info->port.tty) tty_vhangup(info->port.tty); /* - * All users of this port should now be disconnected from - * this driver, and the port shut down. We should be the - * only thread fiddling with this port from now on. - */ - state->info = NULL; - - /* * Free the port IO and memory resources, if any. */ if (port->type != PORT_UNKNOWN) @@ -2552,10 +2533,8 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port) /* * Kill the tasklet, and free resources. */ - if (info) { + if (info) tasklet_kill(&info->tlet); - kfree(info); - } state->port = NULL; mutex_unlock(&port_mutex); diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c index 64be4d88df11..8582236e4cad 100644 --- a/drivers/usb/host/hwa-hc.c +++ b/drivers/usb/host/hwa-hc.c @@ -54,7 +54,6 @@ * DWA). */ #include <linux/kernel.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/module.h> #include <linux/workqueue.h> @@ -63,16 +62,12 @@ #include "../wusbcore/wa-hc.h" #include "../wusbcore/wusbhc.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - struct hwahc { struct wusbhc wusbhc; /* has to be 1st */ struct wahc wa; - u8 buffer[16]; /* for misc usb transactions */ }; -/** +/* * FIXME should be wusbhc * * NOTE: we need to cache the Cluster ID because later...there is no @@ -126,7 +121,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd) struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); struct device *dev = &hwahc->wa.usb_iface->dev; - d_fnstart(4, dev, "(hwahc %p)\n", hwahc); mutex_lock(&wusbhc->mutex); wa_nep_disarm(&hwahc->wa); result = __wa_set_feature(&hwahc->wa, WA_RESET); @@ -134,7 +128,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd) dev_err(dev, "error commanding HC to reset: %d\n", result); goto error_unlock; } - d_printf(3, dev, "reset: waiting for device to change state\n"); result = __wa_wait_status(&hwahc->wa, WA_STATUS_RESETTING, 0); if (result < 0) { dev_err(dev, "error waiting for HC to reset: %d\n", result); @@ -142,7 +135,6 @@ static int hwahc_op_reset(struct usb_hcd *usb_hcd) } error_unlock: mutex_unlock(&wusbhc->mutex); - d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); return result; } @@ -155,15 +147,9 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd) int result; struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd); struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); - struct device *dev = &hwahc->wa.usb_iface->dev; - /* Set up a Host Info WUSB Information Element */ - d_fnstart(4, dev, "(hwahc %p)\n", hwahc); result = -ENOSPC; mutex_lock(&wusbhc->mutex); - /* Start the numbering from the top so that the bottom - * range of the unauth addr space is used for devices, - * the top for HCs; use 0xfe - RC# */ addr = wusb_cluster_id_get(); if (addr == 0) goto error_cluster_id_get; @@ -171,22 +157,14 @@ static int hwahc_op_start(struct usb_hcd *usb_hcd) if (result < 0) goto error_set_cluster_id; - result = wa_nep_arm(&hwahc->wa, GFP_KERNEL); - if (result < 0) { - dev_err(dev, "cannot listen to notifications: %d\n", result); - goto error_stop; - } usb_hcd->uses_new_polling = 1; usb_hcd->poll_rh = 1; usb_hcd->state = HC_STATE_RUNNING; result = 0; out: mutex_unlock(&wusbhc->mutex); - d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); return result; -error_stop: - __wa_stop(&hwahc->wa); error_set_cluster_id: wusb_cluster_id_put(wusbhc->cluster_id); error_cluster_id_get: @@ -194,39 +172,6 @@ error_cluster_id_get: } -/* - * FIXME: break this function up - */ -static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc) -{ - int result; - struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); - struct device *dev = &hwahc->wa.usb_iface->dev; - - /* Set up a Host Info WUSB Information Element */ - d_fnstart(4, dev, "(hwahc %p)\n", hwahc); - result = -ENOSPC; - - result = __wa_set_feature(&hwahc->wa, WA_ENABLE); - if (result < 0) { - dev_err(dev, "error commanding HC to start: %d\n", result); - goto error_stop; - } - result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE); - if (result < 0) { - dev_err(dev, "error waiting for HC to start: %d\n", result); - goto error_stop; - } - result = 0; -out: - d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); - return result; - -error_stop: - result = __wa_clear_feature(&hwahc->wa, WA_ENABLE); - goto out; -} - static int hwahc_op_suspend(struct usb_hcd *usb_hcd, pm_message_t msg) { struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd); @@ -246,18 +191,6 @@ static int hwahc_op_resume(struct usb_hcd *usb_hcd) return -ENOSYS; } -static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc) -{ - int result; - struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); - struct device *dev = &hwahc->wa.usb_iface->dev; - - d_fnstart(4, dev, "(hwahc %p)\n", hwahc); - /* Nothing for now */ - d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); - return; -} - /* * No need to abort pipes, as when this is called, all the children * has been disconnected and that has done it [through @@ -266,21 +199,11 @@ static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc) */ static void hwahc_op_stop(struct usb_hcd *usb_hcd) { - int result; struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd); - struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); - struct wahc *wa = &hwahc->wa; - struct device *dev = &wa->usb_iface->dev; - d_fnstart(4, dev, "(hwahc %p)\n", hwahc); mutex_lock(&wusbhc->mutex); - wusbhc_stop(wusbhc); - wa_nep_disarm(&hwahc->wa); - result = __wa_stop(&hwahc->wa); wusb_cluster_id_put(wusbhc->cluster_id); mutex_unlock(&wusbhc->mutex); - d_fnend(4, dev, "(hwahc %p) = %d\n", hwahc, result); - return; } static int hwahc_op_get_frame_number(struct usb_hcd *usb_hcd) @@ -325,6 +248,54 @@ static void hwahc_op_endpoint_disable(struct usb_hcd *usb_hcd, rpipe_ep_disable(&hwahc->wa, ep); } +static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc) +{ + int result; + struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); + struct device *dev = &hwahc->wa.usb_iface->dev; + + result = __wa_set_feature(&hwahc->wa, WA_ENABLE); + if (result < 0) { + dev_err(dev, "error commanding HC to start: %d\n", result); + goto error_stop; + } + result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE); + if (result < 0) { + dev_err(dev, "error waiting for HC to start: %d\n", result); + goto error_stop; + } + result = wa_nep_arm(&hwahc->wa, GFP_KERNEL); + if (result < 0) { + dev_err(dev, "cannot listen to notifications: %d\n", result); + goto error_stop; + } + return result; + +error_stop: + __wa_clear_feature(&hwahc->wa, WA_ENABLE); + return result; +} + +static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay) +{ + struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc); + struct wahc *wa = &hwahc->wa; + u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber; + int ret; + + ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0), + WUSB_REQ_CHAN_STOP, + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, + delay * 1000, + iface_no, + NULL, 0, 1000 /* FIXME: arbitrary */); + if (ret == 0) + msleep(delay); + + wa_nep_disarm(&hwahc->wa); + __wa_stop(&hwahc->wa); +} + /* * Set the UWB MAS allocation for the WUSB cluster * @@ -581,11 +552,11 @@ static int wa_fill_descr(struct wahc *wa) itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength); while (itr_size >= sizeof(*hdr)) { hdr = (struct usb_descriptor_header *) itr; - d_printf(3, dev, "Extra device descriptor: " - "type %02x/%u bytes @ %zu (%zu left)\n", - hdr->bDescriptorType, hdr->bLength, - (itr - usb_dev->rawdescriptors[actconfig_idx]), - itr_size); + dev_dbg(dev, "Extra device descriptor: " + "type %02x/%u bytes @ %zu (%zu left)\n", + hdr->bDescriptorType, hdr->bLength, + (itr - usb_dev->rawdescriptors[actconfig_idx]), + itr_size); if (hdr->bDescriptorType == USB_DT_WIRE_ADAPTER) goto found; itr += hdr->bLength; @@ -794,7 +765,6 @@ static void hwahc_destroy(struct hwahc *hwahc) { struct wusbhc *wusbhc = &hwahc->wusbhc; - d_fnstart(1, NULL, "(hwahc %p)\n", hwahc); mutex_lock(&wusbhc->mutex); __wa_destroy(&hwahc->wa); wusbhc_destroy(&hwahc->wusbhc); @@ -804,7 +774,6 @@ static void hwahc_destroy(struct hwahc *hwahc) usb_put_intf(hwahc->wa.usb_iface); usb_put_dev(hwahc->wa.usb_dev); mutex_unlock(&wusbhc->mutex); - d_fnend(1, NULL, "(hwahc %p) = void\n", hwahc); } static void hwahc_init(struct hwahc *hwahc) @@ -821,7 +790,6 @@ static int hwahc_probe(struct usb_interface *usb_iface, struct hwahc *hwahc; struct device *dev = &usb_iface->dev; - d_fnstart(4, dev, "(%p, %p)\n", usb_iface, id); result = -ENOMEM; usb_hcd = usb_create_hcd(&hwahc_hc_driver, &usb_iface->dev, "wusb-hwa"); if (usb_hcd == NULL) { @@ -848,7 +816,6 @@ static int hwahc_probe(struct usb_interface *usb_iface, dev_err(dev, "Cannot setup phase B of WUSBHC: %d\n", result); goto error_wusbhc_b_create; } - d_fnend(4, dev, "(%p, %p) = 0\n", usb_iface, id); return 0; error_wusbhc_b_create: @@ -858,7 +825,6 @@ error_add_hcd: error_hwahc_create: usb_put_hcd(usb_hcd); error_alloc: - d_fnend(4, dev, "(%p, %p) = %d\n", usb_iface, id, result); return result; } @@ -872,16 +838,12 @@ static void hwahc_disconnect(struct usb_interface *usb_iface) wusbhc = usb_hcd_to_wusbhc(usb_hcd); hwahc = container_of(wusbhc, struct hwahc, wusbhc); - d_fnstart(1, NULL, "(hwahc %p [usb_iface %p])\n", hwahc, usb_iface); wusbhc_b_destroy(&hwahc->wusbhc); usb_remove_hcd(usb_hcd); hwahc_destroy(hwahc); usb_put_hcd(usb_hcd); - d_fnend(1, NULL, "(hwahc %p [usb_iface %p]) = void\n", hwahc, - usb_iface); } -/** USB device ID's that we handle */ static struct usb_device_id hwahc_id_table[] = { /* FIXME: use class labels for this */ { USB_INTERFACE_INFO(0xe0, 0x02, 0x01), }, @@ -898,18 +860,7 @@ static struct usb_driver hwahc_driver = { static int __init hwahc_driver_init(void) { - int result; - result = usb_register(&hwahc_driver); - if (result < 0) { - printk(KERN_ERR "WA-CDS: Cannot register USB driver: %d\n", - result); - goto error_usb_register; - } - return 0; - -error_usb_register: - return result; - + return usb_register(&hwahc_driver); } module_init(hwahc_driver_init); diff --git a/drivers/usb/host/whci/Kbuild b/drivers/usb/host/whci/Kbuild index 26a3871ea0f9..11e5040b8337 100644 --- a/drivers/usb/host/whci/Kbuild +++ b/drivers/usb/host/whci/Kbuild @@ -2,6 +2,7 @@ obj-$(CONFIG_USB_WHCI_HCD) += whci-hcd.o whci-hcd-y := \ asl.o \ + debug.o \ hcd.o \ hw.o \ init.o \ diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c index 4d7078e50572..577c0d29849d 100644 --- a/drivers/usb/host/whci/asl.c +++ b/drivers/usb/host/whci/asl.c @@ -19,32 +19,11 @@ #include <linux/dma-mapping.h> #include <linux/uwb/umc.h> #include <linux/usb.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> #include "../../wusbcore/wusbhc.h" #include "whcd.h" -#if D_LOCAL >= 4 -static void dump_asl(struct whc *whc, const char *tag) -{ - struct device *dev = &whc->umc->dev; - struct whc_qset *qset; - - d_printf(4, dev, "ASL %s\n", tag); - - list_for_each_entry(qset, &whc->async_list, list_node) { - dump_qset(qset, dev); - } -} -#else -static inline void dump_asl(struct whc *whc, const char *tag) -{ -} -#endif - - static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset, struct whc_qset **next, struct whc_qset **prev) { @@ -179,11 +158,26 @@ void asl_stop(struct whc *whc) 1000, "stop ASL"); } +/** + * asl_update - request an ASL update and wait for the hardware to be synced + * @whc: the WHCI HC + * @wusbcmd: WUSBCMD value to start the update. + * + * If the WUSB HC is inactive (i.e., the ASL is stopped) then the + * update must be skipped as the hardware may not respond to update + * requests. + */ void asl_update(struct whc *whc, uint32_t wusbcmd) { - whc_write_wusbcmd(whc, wusbcmd, wusbcmd); - wait_event(whc->async_list_wq, - (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0); + struct wusbhc *wusbhc = &whc->wusbhc; + + mutex_lock(&wusbhc->mutex); + if (wusbhc->active) { + whc_write_wusbcmd(whc, wusbcmd, wusbcmd); + wait_event(whc->async_list_wq, + (le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0); + } + mutex_unlock(&wusbhc->mutex); } /** @@ -202,8 +196,6 @@ void scan_async_work(struct work_struct *work) spin_lock_irq(&whc->lock); - dump_asl(whc, "before processing"); - /* * Transerve the software list backwards so new qsets can be * safely inserted into the ASL without making it non-circular. @@ -217,8 +209,6 @@ void scan_async_work(struct work_struct *work) update |= process_qset(whc, qset); } - dump_asl(whc, "after processing"); - spin_unlock_irq(&whc->lock); if (update) { diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c new file mode 100644 index 000000000000..cf2d45946c57 --- /dev/null +++ b/drivers/usb/host/whci/debug.c @@ -0,0 +1,189 @@ +/* + * Wireless Host Controller (WHC) debug. + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include "../../wusbcore/wusbhc.h" + +#include "whcd.h" + +struct whc_dbg { + struct dentry *di_f; + struct dentry *asl_f; + struct dentry *pzl_f; +}; + +void qset_print(struct seq_file *s, struct whc_qset *qset) +{ + struct whc_std *std; + struct urb *urb = NULL; + int i; + + seq_printf(s, "qset %08x\n", (u32)qset->qset_dma); + seq_printf(s, " -> %08x\n", (u32)qset->qh.link); + seq_printf(s, " info: %08x %08x %08x\n", + qset->qh.info1, qset->qh.info2, qset->qh.info3); + seq_printf(s, " sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count); + seq_printf(s, " TD: sts: %08x opts: %08x\n", + qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options); + + for (i = 0; i < WHCI_QSET_TD_MAX; i++) { + seq_printf(s, " %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n", + i == qset->td_start ? 'S' : ' ', + i == qset->td_end ? 'E' : ' ', + i, qset->qtd[i].status, qset->qtd[i].options, + (u32)qset->qtd[i].page_list_ptr); + } + seq_printf(s, " ntds: %d\n", qset->ntds); + list_for_each_entry(std, &qset->stds, list_node) { + if (urb != std->urb) { + urb = std->urb; + seq_printf(s, " urb %p transferred: %d bytes\n", urb, + urb->actual_length); + } + if (std->qtd) + seq_printf(s, " sTD[%td]: %zu bytes @ %08x\n", + std->qtd - &qset->qtd[0], + std->len, std->num_pointers ? + (u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr); + else + seq_printf(s, " sTD[-]: %zd bytes @ %08x\n", + std->len, std->num_pointers ? + (u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr); + } +} + +static int di_print(struct seq_file *s, void *p) +{ + struct whc *whc = s->private; + char buf[72]; + int d; + + for (d = 0; d < whc->n_devices; d++) { + struct di_buf_entry *di = &whc->di_buf[d]; + + bitmap_scnprintf(buf, sizeof(buf), + (unsigned long *)di->availability_info, UWB_NUM_MAS); + + seq_printf(s, "DI[%d]\n", d); + seq_printf(s, " availability: %s\n", buf); + seq_printf(s, " %c%c key idx: %d dev addr: %d\n", + (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ', + (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ', + (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8, + (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK)); + } + return 0; +} + +static int asl_print(struct seq_file *s, void *p) +{ + struct whc *whc = s->private; + struct whc_qset *qset; + + list_for_each_entry(qset, &whc->async_list, list_node) { + qset_print(s, qset); + } + + return 0; +} + +static int pzl_print(struct seq_file *s, void *p) +{ + struct whc *whc = s->private; + struct whc_qset *qset; + int period; + + for (period = 0; period < 5; period++) { + seq_printf(s, "Period %d\n", period); + list_for_each_entry(qset, &whc->periodic_list[period], list_node) { + qset_print(s, qset); + } + } + return 0; +} + +static int di_open(struct inode *inode, struct file *file) +{ + return single_open(file, di_print, inode->i_private); +} + +static int asl_open(struct inode *inode, struct file *file) +{ + return single_open(file, asl_print, inode->i_private); +} + +static int pzl_open(struct inode *inode, struct file *file) +{ + return single_open(file, pzl_print, inode->i_private); +} + +static struct file_operations di_fops = { + .open = di_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static struct file_operations asl_fops = { + .open = asl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static struct file_operations pzl_fops = { + .open = pzl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +void whc_dbg_init(struct whc *whc) +{ + if (whc->wusbhc.pal.debugfs_dir == NULL) + return; + + whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL); + if (whc->dbg == NULL) + return; + + whc->dbg->di_f = debugfs_create_file("di", 0444, + whc->wusbhc.pal.debugfs_dir, whc, + &di_fops); + whc->dbg->asl_f = debugfs_create_file("asl", 0444, + whc->wusbhc.pal.debugfs_dir, whc, + &asl_fops); + whc->dbg->pzl_f = debugfs_create_file("pzl", 0444, + whc->wusbhc.pal.debugfs_dir, whc, + &pzl_fops); +} + +void whc_dbg_clean_up(struct whc *whc) +{ + if (whc->dbg) { + debugfs_remove(whc->dbg->pzl_f); + debugfs_remove(whc->dbg->asl_f); + debugfs_remove(whc->dbg->di_f); + kfree(whc->dbg); + } +} diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c index ef3ad4dca945..1569afd6245b 100644 --- a/drivers/usb/host/whci/hcd.c +++ b/drivers/usb/host/whci/hcd.c @@ -15,7 +15,6 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/uwb/umc.h> @@ -92,8 +91,6 @@ static void whc_stop(struct usb_hcd *usb_hcd) mutex_lock(&wusbhc->mutex); - wusbhc_stop(wusbhc); - /* stop HC */ le_writel(0, whc->base + WUSBINTR); whc_write_wusbcmd(whc, WUSBCMD_RUN, 0); @@ -276,6 +273,8 @@ static int whc_probe(struct umc_dev *umc) goto error_wusbhc_b_create; } + whc_dbg_init(whc); + return 0; error_wusbhc_b_create: @@ -299,6 +298,7 @@ static void whc_remove(struct umc_dev *umc) struct whc *whc = wusbhc_to_whc(wusbhc); if (usb_hcd) { + whc_dbg_clean_up(whc); wusbhc_b_destroy(wusbhc); usb_remove_hcd(usb_hcd); wusbhc_destroy(wusbhc); diff --git a/drivers/usb/host/whci/hw.c b/drivers/usb/host/whci/hw.c index ac86e59c1225..d498e7203217 100644 --- a/drivers/usb/host/whci/hw.c +++ b/drivers/usb/host/whci/hw.c @@ -50,6 +50,7 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len) unsigned long flags; dma_addr_t dma_addr; int t; + int ret = 0; mutex_lock(&whc->mutex); @@ -61,7 +62,8 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len) dev_err(&whc->umc->dev, "generic command timeout (%04x/%04x)\n", le_readl(whc->base + WUSBGENCMDSTS), le_readl(whc->base + WUSBGENCMDPARAMS)); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } if (addr) { @@ -80,8 +82,8 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len) whc->base + WUSBGENCMDSTS); spin_unlock_irqrestore(&whc->lock, flags); - +out: mutex_unlock(&whc->mutex); - return 0; + return ret; } diff --git a/drivers/usb/host/whci/int.c b/drivers/usb/host/whci/int.c index fce01174aa9b..6aae70028101 100644 --- a/drivers/usb/host/whci/int.c +++ b/drivers/usb/host/whci/int.c @@ -15,7 +15,6 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/uwb/umc.h> diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c index 8d62df0c330b..2ae5abf69a6a 100644 --- a/drivers/usb/host/whci/pzl.c +++ b/drivers/usb/host/whci/pzl.c @@ -19,35 +19,11 @@ #include <linux/dma-mapping.h> #include <linux/uwb/umc.h> #include <linux/usb.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> #include "../../wusbcore/wusbhc.h" #include "whcd.h" -#if D_LOCAL >= 4 -static void dump_pzl(struct whc *whc, const char *tag) -{ - struct device *dev = &whc->umc->dev; - struct whc_qset *qset; - int period = 0; - - d_printf(4, dev, "PZL %s\n", tag); - - for (period = 0; period < 5; period++) { - d_printf(4, dev, "Period %d\n", period); - list_for_each_entry(qset, &whc->periodic_list[period], list_node) { - dump_qset(qset, dev); - } - } -} -#else -static inline void dump_pzl(struct whc *whc, const char *tag) -{ -} -#endif - static void update_pzl_pointers(struct whc *whc, int period, u64 addr) { switch (period) { @@ -195,11 +171,26 @@ void pzl_stop(struct whc *whc) 1000, "stop PZL"); } +/** + * pzl_update - request a PZL update and wait for the hardware to be synced + * @whc: the WHCI HC + * @wusbcmd: WUSBCMD value to start the update. + * + * If the WUSB HC is inactive (i.e., the PZL is stopped) then the + * update must be skipped as the hardware may not respond to update + * requests. + */ void pzl_update(struct whc *whc, uint32_t wusbcmd) { - whc_write_wusbcmd(whc, wusbcmd, wusbcmd); - wait_event(whc->periodic_list_wq, - (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0); + struct wusbhc *wusbhc = &whc->wusbhc; + + mutex_lock(&wusbhc->mutex); + if (wusbhc->active) { + whc_write_wusbcmd(whc, wusbcmd, wusbcmd); + wait_event(whc->periodic_list_wq, + (le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0); + } + mutex_unlock(&wusbhc->mutex); } static void update_pzl_hw_view(struct whc *whc) @@ -235,8 +226,6 @@ void scan_periodic_work(struct work_struct *work) spin_lock_irq(&whc->lock); - dump_pzl(whc, "before processing"); - for (period = 4; period >= 0; period--) { list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) { if (!qset->in_hw_list) @@ -248,8 +237,6 @@ void scan_periodic_work(struct work_struct *work) if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED)) update_pzl_hw_view(whc); - dump_pzl(whc, "after processing"); - spin_unlock_irq(&whc->lock); if (update) { diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c index 0420037d2e18..7be74314ee12 100644 --- a/drivers/usb/host/whci/qset.c +++ b/drivers/usb/host/whci/qset.c @@ -24,46 +24,6 @@ #include "whcd.h" -void dump_qset(struct whc_qset *qset, struct device *dev) -{ - struct whc_std *std; - struct urb *urb = NULL; - int i; - - dev_dbg(dev, "qset %08x\n", (u32)qset->qset_dma); - dev_dbg(dev, " -> %08x\n", (u32)qset->qh.link); - dev_dbg(dev, " info: %08x %08x %08x\n", - qset->qh.info1, qset->qh.info2, qset->qh.info3); - dev_dbg(dev, " sts: %04x errs: %d\n", qset->qh.status, qset->qh.err_count); - dev_dbg(dev, " TD: sts: %08x opts: %08x\n", - qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options); - - for (i = 0; i < WHCI_QSET_TD_MAX; i++) { - dev_dbg(dev, " %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n", - i == qset->td_start ? 'S' : ' ', - i == qset->td_end ? 'E' : ' ', - i, qset->qtd[i].status, qset->qtd[i].options, - (u32)qset->qtd[i].page_list_ptr); - } - dev_dbg(dev, " ntds: %d\n", qset->ntds); - list_for_each_entry(std, &qset->stds, list_node) { - if (urb != std->urb) { - urb = std->urb; - dev_dbg(dev, " urb %p transferred: %d bytes\n", urb, - urb->actual_length); - } - if (std->qtd) - dev_dbg(dev, " sTD[%td]: %zu bytes @ %08x\n", - std->qtd - &qset->qtd[0], - std->len, std->num_pointers ? - (u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr); - else - dev_dbg(dev, " sTD[-]: %zd bytes @ %08x\n", - std->len, std->num_pointers ? - (u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr); - } -} - struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags) { struct whc_qset *qset; diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h index 1d2a53bd39fd..0f3540f04f53 100644 --- a/drivers/usb/host/whci/whcd.h +++ b/drivers/usb/host/whci/whcd.h @@ -21,6 +21,7 @@ #define __WHCD_H #include <linux/uwb/whci.h> +#include <linux/uwb/umc.h> #include <linux/workqueue.h> #include "whci-hc.h" @@ -28,6 +29,7 @@ /* Generic command timeout. */ #define WHC_GENCMD_TIMEOUT_MS 100 +struct whc_dbg; struct whc { struct wusbhc wusbhc; @@ -69,6 +71,8 @@ struct whc { struct list_head periodic_removed_list; wait_queue_head_t periodic_list_wq; struct work_struct periodic_work; + + struct whc_dbg *dbg; }; #define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc)) @@ -136,7 +140,7 @@ int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len); /* wusb.c */ int whc_wusbhc_start(struct wusbhc *wusbhc); -void whc_wusbhc_stop(struct wusbhc *wusbhc); +void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay); int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt, u8 handle, struct wuie_hdr *wuie); int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle); @@ -190,8 +194,11 @@ void process_inactive_qtd(struct whc *whc, struct whc_qset *qset, struct whc_qtd *qtd); enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset); void qset_remove_complete(struct whc *whc, struct whc_qset *qset); -void dump_qset(struct whc_qset *qset, struct device *dev); void pzl_update(struct whc *whc, uint32_t wusbcmd); void asl_update(struct whc *whc, uint32_t wusbcmd); +/* debug.c */ +void whc_dbg_init(struct whc *whc); +void whc_dbg_clean_up(struct whc *whc); + #endif /* #ifndef __WHCD_H */ diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h index bff1eb7a35cf..51df7e313b38 100644 --- a/drivers/usb/host/whci/whci-hc.h +++ b/drivers/usb/host/whci/whci-hc.h @@ -410,6 +410,8 @@ struct dn_buf_entry { # define WUSBDNTSCTRL_SLOTS(s) ((s) << 0) #define WUSBTIME 0x68 +# define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff + #define WUSBBPST 0x6c #define WUSBDIBUPDATED 0x70 diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c index 66e4ddcd961d..f24efdebad17 100644 --- a/drivers/usb/host/whci/wusb.c +++ b/drivers/usb/host/whci/wusb.c @@ -15,47 +15,19 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/uwb/umc.h> -#define D_LOCAL 1 -#include <linux/uwb/debug.h> #include "../../wusbcore/wusbhc.h" #include "whcd.h" -#if D_LOCAL >= 1 -static void dump_di(struct whc *whc, int idx) -{ - struct di_buf_entry *di = &whc->di_buf[idx]; - struct device *dev = &whc->umc->dev; - char buf[128]; - - bitmap_scnprintf(buf, sizeof(buf), (unsigned long *)di->availability_info, UWB_NUM_MAS); - - d_printf(1, dev, "DI[%d]\n", idx); - d_printf(1, dev, " availability: %s\n", buf); - d_printf(1, dev, " %c%c key idx: %d dev addr: %d\n", - (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ', - (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ', - (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8, - (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK)); -} -#else -static inline void dump_di(struct whc *whc, int idx) -{ -} -#endif - static int whc_update_di(struct whc *whc, int idx) { int offset = idx / 32; u32 bit = 1 << (idx % 32); - dump_di(whc, idx); - le_writel(bit, whc->base + WUSBDIBUPDATED + offset); return whci_wait_for(&whc->umc->dev, @@ -64,8 +36,9 @@ static int whc_update_di(struct whc *whc, int idx) } /* - * WHCI starts and stops MMCs based on there being a valid GTK so - * these need only start/stop the asynchronous and periodic schedules. + * WHCI starts MMCs based on there being a valid GTK so these need + * only start/stop the asynchronous and periodic schedules and send a + * channel stop command. */ int whc_wusbhc_start(struct wusbhc *wusbhc) @@ -78,12 +51,20 @@ int whc_wusbhc_start(struct wusbhc *wusbhc) return 0; } -void whc_wusbhc_stop(struct wusbhc *wusbhc) +void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay) { struct whc *whc = wusbhc_to_whc(wusbhc); + u32 stop_time, now_time; + int ret; pzl_stop(whc); asl_stop(whc); + + now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK; + stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff; + ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0); + if (ret == 0) + msleep(delay); } int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt, diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 5b95009d2fbb..19e24045b137 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -241,12 +241,25 @@ static void usb_console_write(struct console *co, } } +static struct tty_driver *usb_console_device(struct console *co, int *index) +{ + struct tty_driver **p = (struct tty_driver **)co->data; + + if (!*p) + return NULL; + + *index = co->index; + return *p; +} + static struct console usbcons = { .name = "ttyUSB", .write = usb_console_write, + .device = usb_console_device, .setup = usb_console_setup, .flags = CON_PRINTBUFFER, .index = -1, + .data = &usb_serial_tty_driver, }; void usb_serial_console_disconnect(struct usb_serial *serial) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index fb6f2933b01b..ef6cfa5a447f 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1054,6 +1054,8 @@ static int set_serial_info(struct tty_struct *tty, if (copy_from_user(&new_serial, newinfo, sizeof(new_serial))) return -EFAULT; + + lock_kernel(); old_priv = *priv; /* Do error checking and permission checking */ @@ -1069,8 +1071,10 @@ static int set_serial_info(struct tty_struct *tty, } if ((new_serial.baud_base != priv->baud_base) && - (new_serial.baud_base < 9600)) + (new_serial.baud_base < 9600)) { + unlock_kernel(); return -EINVAL; + } /* Make the changes - these are privileged changes! */ @@ -1098,8 +1102,11 @@ check_and_exit: (priv->flags & ASYNC_SPD_MASK)) || (((priv->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST) && (old_priv.custom_divisor != priv->custom_divisor))) { + unlock_kernel(); change_speed(tty, port); } + else + unlock_kernel(); return 0; } /* set_serial_info */ diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index dc36a052766f..fcd9082f3e7f 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -878,6 +878,7 @@ static void mct_u232_break_ctl(struct tty_struct *tty, int break_state) dbg("%sstate=%d", __func__, break_state); + /* LOCKING */ if (break_state) lcr |= MCT_U232_SET_BREAK; diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 07710cf31d0d..82930a7d5093 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -721,10 +721,10 @@ static void mct_u232_break_ctl(struct tty_struct *tty, int break_state) spin_lock_irqsave(&priv->lock, flags); lcr = priv->last_lcr; - spin_unlock_irqrestore(&priv->lock, flags); if (break_state) lcr |= MCT_U232_SET_BREAK; + spin_unlock_irqrestore(&priv->lock, flags); mct_u232_set_line_ctrl(serial, lcr); } /* mct_u232_break_ctl */ diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index fda4a6421c44..96a8c7713212 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1343,6 +1343,7 @@ static void mos7840_break(struct tty_struct *tty, int break_state) else data = mos7840_port->shadowLCR & ~LCR_SET_BREAK; + /* FIXME: no locking on shadowLCR anywhere in driver */ mos7840_port->shadowLCR = data; dbg("mcs7840_break mos7840_port->shadowLCR is %x\n", mos7840_port->shadowLCR); @@ -2214,10 +2215,12 @@ static int mos7840_set_modem_info(struct moschip_port *mos7840_port, break; } + lock_kernel(); mos7840_port->shadowMCR = mcr; Data = mos7840_port->shadowMCR; status = mos7840_set_uart_reg(port, MODEM_CONTROL_REGISTER, Data); + unlock_kernel(); if (status < 0) { dbg("setting MODEM_CONTROL_REGISTER Failed\n"); return -1; diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 0f2b67244af6..d9bf9a5c20ec 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -442,7 +442,7 @@ static void sierra_indat_callback(struct urb *urb) " endpoint %02x.", __func__, status, endpoint); } else { if (urb->actual_length) { - tty = tty_port_tty_get(&port->port); + tty = tty_port_tty_get(&port->port); tty_buffer_request_room(tty, urb->actual_length); tty_insert_flip_string(tty, data, urb->actual_length); tty_flip_buffer_push(tty); diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 794b5ffe4397..080ade223d53 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -269,15 +269,19 @@ static void serial_close(struct tty_struct *tty, struct file *filp) return; } - --port->port.count; - if (port->port.count == 0) + if (port->port.count == 1) /* only call the device specific close if this - * port is being closed by the last owner */ + * port is being closed by the last owner. Ensure we do + * this before we drop the port count. The call is protected + * by the port mutex + */ port->serial->type->close(tty, port, filp); - if (port->port.count == (port->console? 1 : 0)) { + if (port->port.count == (port->console ? 2 : 1)) { struct tty_struct *tty = tty_port_tty_get(&port->port); if (tty) { + /* We must do this before we drop the port count to + zero. */ if (tty->driver_data) tty->driver_data = NULL; tty_port_tty_set(&port->port, NULL); @@ -285,13 +289,14 @@ static void serial_close(struct tty_struct *tty, struct file *filp) } } - if (port->port.count == 0) { + if (port->port.count == 1) { mutex_lock(&port->serial->disc_mutex); if (!port->serial->disconnected) usb_autopm_put_interface(port->serial->interface); mutex_unlock(&port->serial->disc_mutex); module_put(port->serial->type->driver.owner); } + --port->port.count; mutex_unlock(&port->mutex); usb_serial_put(port->serial); @@ -334,6 +339,10 @@ static int serial_chars_in_buffer(struct tty_struct *tty) dbg("%s = port %d", __func__, port->number); WARN_ON(!port->port.count); + /* if the device was unplugged then any remaining characters + fell out of the connector ;) */ + if (port->serial->disconnected) + return 0; /* pass on to the driver specific version of this function */ return port->serial->type->chars_in_buffer(tty); } @@ -373,9 +382,7 @@ static int serial_ioctl(struct tty_struct *tty, struct file *file, /* pass on to the driver specific version of this function if it is available */ if (port->serial->type->ioctl) { - lock_kernel(); retval = port->serial->type->ioctl(tty, file, cmd, arg); - unlock_kernel(); } else retval = -ENOIOCTLCMD; return retval; @@ -404,11 +411,8 @@ static int serial_break(struct tty_struct *tty, int break_state) WARN_ON(!port->port.count); /* pass on to the driver specific version of this function if it is available */ - if (port->serial->type->break_ctl) { - lock_kernel(); + if (port->serial->type->break_ctl) port->serial->type->break_ctl(tty, break_state); - unlock_kernel(); - } return 0; } diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c index ab4788d1785a..1335cbe1191d 100644 --- a/drivers/usb/wusbcore/cbaf.c +++ b/drivers/usb/wusbcore/cbaf.c @@ -88,7 +88,6 @@ */ #include <linux/module.h> #include <linux/ctype.h> -#include <linux/version.h> #include <linux/usb.h> #include <linux/interrupt.h> #include <linux/delay.h> diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index c36c4389baae..9ec7fd5da489 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -51,9 +51,17 @@ #include <linux/uwb.h> #include <linux/usb/wusb.h> #include <linux/scatterlist.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> +static int debug_crypto_verify = 0; + +module_param(debug_crypto_verify, int, 0); +MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms"); + +static void wusb_key_dump(const void *buf, size_t len) +{ + print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_OFFSET, 16, 1, + buf, len, 0); +} /* * Block of data, as understood by AES-CCM @@ -203,9 +211,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, const u8 bzero[16] = { 0 }; size_t zero_padding; - d_fnstart(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, " - "n %p, a %p, b %p, blen %zu)\n", - tfm_cbc, tfm_aes, mic, n, a, b, blen); /* * These checks should be compile time optimized out * ensure @a fills b1's mac_header and following fields @@ -247,16 +252,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, b1.la = cpu_to_be16(blen + 14); memcpy(&b1.mac_header, a, sizeof(*a)); - d_printf(4, NULL, "I: B0 (%zu bytes)\n", sizeof(b0)); - d_dump(4, NULL, &b0, sizeof(b0)); - d_printf(4, NULL, "I: B1 (%zu bytes)\n", sizeof(b1)); - d_dump(4, NULL, &b1, sizeof(b1)); - d_printf(4, NULL, "I: B (%zu bytes)\n", blen); - d_dump(4, NULL, b, blen); - d_printf(4, NULL, "I: B 0-padding (%zu bytes)\n", zero_padding); - d_printf(4, NULL, "D: IV before crypto (%zu)\n", ivsize); - d_dump(4, NULL, iv, ivsize); - sg_init_table(sg, ARRAY_SIZE(sg)); sg_set_buf(&sg[0], &b0, sizeof(b0)); sg_set_buf(&sg[1], &b1, sizeof(b1)); @@ -273,8 +268,6 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, result); goto error_cbc_crypt; } - d_printf(4, NULL, "D: MIC tag\n"); - d_dump(4, NULL, iv, ivsize); /* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5] * The procedure is to AES crypt the A0 block and XOR the MIC @@ -289,17 +282,10 @@ static int wusb_ccm_mac(struct crypto_blkcipher *tfm_cbc, ax.counter = 0; crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax); bytewise_xor(mic, &ax, iv, 8); - d_printf(4, NULL, "D: CTR[MIC]\n"); - d_dump(4, NULL, &ax, 8); - d_printf(4, NULL, "D: CCM-MIC tag\n"); - d_dump(4, NULL, mic, 8); result = 8; error_cbc_crypt: kfree(dst_buf); error_dst_buf: - d_fnend(3, NULL, "(tfm_cbc %p, tfm_aes %p, mic %p, " - "n %p, a %p, b %p, blen %zu)\n", - tfm_cbc, tfm_aes, mic, n, a, b, blen); return result; } @@ -321,10 +307,6 @@ ssize_t wusb_prf(void *out, size_t out_size, u64 sfn = 0; __le64 sfn_le; - d_fnstart(3, NULL, "(out %p, out_size %zu, key %p, _n %p, " - "a %p, b %p, blen %zu, len %zu)\n", out, out_size, - key, _n, a, b, blen, len); - tfm_cbc = crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm_cbc)) { result = PTR_ERR(tfm_cbc); @@ -366,9 +348,6 @@ error_alloc_aes: error_setkey_cbc: crypto_free_blkcipher(tfm_cbc); error_alloc_cbc: - d_fnend(3, NULL, "(out %p, out_size %zu, key %p, _n %p, " - "a %p, b %p, blen %zu, len %zu) = %d\n", out, out_size, - key, _n, a, b, blen, len, (int)bytes); return result; } @@ -422,14 +401,14 @@ static int wusb_oob_mic_verify(void) "mismatch between MIC result and WUSB1.0[A2]\n"); hs_size = sizeof(stv_hsmic_hs) - sizeof(stv_hsmic_hs.MIC); printk(KERN_ERR "E: Handshake2 in: (%zu bytes)\n", hs_size); - dump_bytes(NULL, &stv_hsmic_hs, hs_size); + wusb_key_dump(&stv_hsmic_hs, hs_size); printk(KERN_ERR "E: CCM Nonce in: (%zu bytes)\n", sizeof(stv_hsmic_n)); - dump_bytes(NULL, &stv_hsmic_n, sizeof(stv_hsmic_n)); + wusb_key_dump(&stv_hsmic_n, sizeof(stv_hsmic_n)); printk(KERN_ERR "E: MIC out:\n"); - dump_bytes(NULL, mic, sizeof(mic)); + wusb_key_dump(mic, sizeof(mic)); printk(KERN_ERR "E: MIC out (from WUSB1.0[A.2]):\n"); - dump_bytes(NULL, stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC)); + wusb_key_dump(stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC)); result = -EINVAL; } else result = 0; @@ -497,19 +476,16 @@ static int wusb_key_derive_verify(void) printk(KERN_ERR "E: WUSB key derivation test: " "mismatch between key derivation result " "and WUSB1.0[A1] Errata 2006/12\n"); - printk(KERN_ERR "E: keydvt in: key (%zu bytes)\n", - sizeof(stv_key_a1)); - dump_bytes(NULL, stv_key_a1, sizeof(stv_key_a1)); - printk(KERN_ERR "E: keydvt in: nonce (%zu bytes)\n", - sizeof(stv_keydvt_n_a1)); - dump_bytes(NULL, &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1)); - printk(KERN_ERR "E: keydvt in: hnonce & dnonce (%zu bytes)\n", - sizeof(stv_keydvt_in_a1)); - dump_bytes(NULL, &stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1)); + printk(KERN_ERR "E: keydvt in: key\n"); + wusb_key_dump(stv_key_a1, sizeof(stv_key_a1)); + printk(KERN_ERR "E: keydvt in: nonce\n"); + wusb_key_dump( &stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1)); + printk(KERN_ERR "E: keydvt in: hnonce & dnonce\n"); + wusb_key_dump(&stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1)); printk(KERN_ERR "E: keydvt out: KCK\n"); - dump_bytes(NULL, &keydvt_out.kck, sizeof(keydvt_out.kck)); + wusb_key_dump(&keydvt_out.kck, sizeof(keydvt_out.kck)); printk(KERN_ERR "E: keydvt out: PTK\n"); - dump_bytes(NULL, &keydvt_out.ptk, sizeof(keydvt_out.ptk)); + wusb_key_dump(&keydvt_out.ptk, sizeof(keydvt_out.ptk)); result = -EINVAL; } else result = 0; @@ -526,10 +502,13 @@ int wusb_crypto_init(void) { int result; - result = wusb_key_derive_verify(); - if (result < 0) - return result; - return wusb_oob_mic_verify(); + if (debug_crypto_verify) { + result = wusb_key_derive_verify(); + if (result < 0) + return result; + return wusb_oob_mic_verify(); + } + return 0; } void wusb_crypto_exit(void) diff --git a/drivers/usb/wusbcore/dev-sysfs.c b/drivers/usb/wusbcore/dev-sysfs.c index 7897a19652e5..101834576236 100644 --- a/drivers/usb/wusbcore/dev-sysfs.c +++ b/drivers/usb/wusbcore/dev-sysfs.c @@ -28,10 +28,6 @@ #include <linux/workqueue.h> #include "wusbhc.h" -#undef D_LOCAL -#define D_LOCAL 4 -#include <linux/uwb/debug.h> - static ssize_t wusb_disconnect_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c index f45d777bef34..e2e7e4bc8463 100644 --- a/drivers/usb/wusbcore/devconnect.c +++ b/drivers/usb/wusbcore/devconnect.c @@ -57,9 +57,6 @@ * Called by notif.c:wusb_handle_dn_connect() * when a DN_Connect is received. * - * wusbhc_devconnect_auth() Called by rh.c:wusbhc_rh_port_reset() when - * doing the device connect sequence. - * * wusb_devconnect_acked() Ack done, release resources. * * wusb_handle_dn_alive() Called by notif.c:wusb_handle_dn() @@ -69,9 +66,6 @@ * process a disconenct request from a * device. * - * wusb_dev_reset() Called by rh.c:wusbhc_rh_port_reset() when - * resetting a device. - * * __wusb_dev_disable() Called by rh.c:wusbhc_rh_clear_port_feat() when * disabling a port. * @@ -97,10 +91,6 @@ #include <linux/workqueue.h> #include "wusbhc.h" -#undef D_LOCAL -#define D_LOCAL 1 -#include <linux/uwb/debug.h> - static void wusbhc_devconnect_acked_work(struct work_struct *work); static void wusb_dev_free(struct wusb_dev *wusb_dev) @@ -240,6 +230,7 @@ static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc, list_add_tail(&wusb_dev->cack_node, &wusbhc->cack_list); wusbhc->cack_count++; wusbhc_fill_cack_ie(wusbhc); + return wusb_dev; } @@ -250,12 +241,9 @@ static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc, */ static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev) { - struct device *dev = wusbhc->dev; - d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev); list_del_init(&wusb_dev->cack_node); wusbhc->cack_count--; wusbhc_fill_cack_ie(wusbhc); - d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev); } /* @@ -263,14 +251,11 @@ static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev) static void wusbhc_devconnect_acked(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev) { - struct device *dev = wusbhc->dev; - d_fnstart(3, dev, "(wusbhc %p wusb_dev %p)\n", wusbhc, wusb_dev); wusbhc_cack_rm(wusbhc, wusb_dev); if (wusbhc->cack_count) wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr); else wusbhc_mmcie_rm(wusbhc, &wusbhc->cack_ie.hdr); - d_fnend(3, dev, "(wusbhc %p wusb_dev %p) = void\n", wusbhc, wusb_dev); } static void wusbhc_devconnect_acked_work(struct work_struct *work) @@ -320,7 +305,6 @@ void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc, struct wusb_port *port; unsigned idx, devnum; - d_fnstart(3, dev, "(%p, %p, %s)\n", wusbhc, dnc, pr_cdid); mutex_lock(&wusbhc->mutex); /* Check we are not handling it already */ @@ -366,16 +350,13 @@ void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc, port->wusb_dev = wusb_dev; port->status |= USB_PORT_STAT_CONNECTION; port->change |= USB_PORT_STAT_C_CONNECTION; - port->reset_count = 0; /* Now the port status changed to connected; khubd will * pick the change up and try to reset the port to bring it to * the enabled state--so this process returns up to the stack - * and it calls back into wusbhc_rh_port_reset() who will call - * devconnect_auth(). + * and it calls back into wusbhc_rh_port_reset(). */ error_unlock: mutex_unlock(&wusbhc->mutex); - d_fnend(3, dev, "(%p, %p, %s) = void\n", wusbhc, dnc, pr_cdid); return; } @@ -398,10 +379,8 @@ error_unlock: static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc, struct wusb_port *port) { - struct device *dev = wusbhc->dev; struct wusb_dev *wusb_dev = port->wusb_dev; - d_fnstart(3, dev, "(wusbhc %p, port %p)\n", wusbhc, port); port->status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE | USB_PORT_STAT_SUSPEND | USB_PORT_STAT_RESET | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); @@ -413,15 +392,11 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc, wusb_dev_put(wusb_dev); } port->wusb_dev = NULL; - /* don't reset the reset_count to zero or wusbhc_rh_port_reset will get - * confused! We only reset to zero when we connect a new device. - */ /* After a device disconnects, change the GTK (see [WUSB] * section 6.2.11.2). */ wusbhc_gtk_rekey(wusbhc); - d_fnend(3, dev, "(wusbhc %p, port %p) = void\n", wusbhc, port); /* The Wireless USB part has forgotten about the device already; now * khubd's timer will pick up the disconnection and remove the USB * device from the system @@ -429,39 +404,6 @@ static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc, } /* - * Authenticate a device into the WUSB Cluster - * - * Called from the Root Hub code (rh.c:wusbhc_rh_port_reset()) when - * asking for a reset on a port that is not enabled (ie: first connect - * on the port). - * - * Performs the 4way handshake to allow the device to comunicate w/ the - * WUSB Cluster securely; once done, issue a request to the device for - * it to change to address 0. - * - * This mimics the reset step of Wired USB that once resetting a - * device, leaves the port in enabled state and the dev with the - * default address (0). - * - * WUSB1.0[7.1.2] - * - * @port_idx: port where the change happened--This is the index into - * the wusbhc port array, not the USB port number. - */ -int wusbhc_devconnect_auth(struct wusbhc *wusbhc, u8 port_idx) -{ - struct device *dev = wusbhc->dev; - struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx); - - d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx); - port->status &= ~USB_PORT_STAT_RESET; - port->status |= USB_PORT_STAT_ENABLE; - port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE; - d_fnend(3, dev, "(%p, %u) = 0\n", wusbhc, port_idx); - return 0; -} - -/* * Refresh the list of keep alives to emit in the MMC * * Some devices don't respond to keep alives unless they've been @@ -528,21 +470,15 @@ static void __wusbhc_keep_alive(struct wusbhc *wusbhc) */ static void wusbhc_keep_alive_run(struct work_struct *ws) { - struct delayed_work *dw = - container_of(ws, struct delayed_work, work); - struct wusbhc *wusbhc = - container_of(dw, struct wusbhc, keep_alive_timer); - - d_fnstart(5, wusbhc->dev, "(wusbhc %p)\n", wusbhc); - if (wusbhc->active) { - mutex_lock(&wusbhc->mutex); - __wusbhc_keep_alive(wusbhc); - mutex_unlock(&wusbhc->mutex); - queue_delayed_work(wusbd, &wusbhc->keep_alive_timer, - (wusbhc->trust_timeout * CONFIG_HZ)/1000/2); - } - d_fnend(5, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc); - return; + struct delayed_work *dw = container_of(ws, struct delayed_work, work); + struct wusbhc *wusbhc = container_of(dw, struct wusbhc, keep_alive_timer); + + mutex_lock(&wusbhc->mutex); + __wusbhc_keep_alive(wusbhc); + mutex_unlock(&wusbhc->mutex); + + queue_delayed_work(wusbd, &wusbhc->keep_alive_timer, + msecs_to_jiffies(wusbhc->trust_timeout / 2)); } /* @@ -585,10 +521,6 @@ static struct wusb_dev *wusbhc_find_dev_by_addr(struct wusbhc *wusbhc, u8 addr) */ static void wusbhc_handle_dn_alive(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev) { - struct device *dev = wusbhc->dev; - - d_printf(2, dev, "DN ALIVE: device 0x%02x pong\n", wusb_dev->addr); - mutex_lock(&wusbhc->mutex); wusb_dev->entry_ts = jiffies; __wusbhc_keep_alive(wusbhc); @@ -621,11 +553,10 @@ static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc, "no-beacon" }; - d_fnstart(3, dev, "(%p, %p, %zu)\n", wusbhc, dn_hdr, size); if (size < sizeof(*dnc)) { dev_err(dev, "DN CONNECT: short notification (%zu < %zu)\n", size, sizeof(*dnc)); - goto out; + return; } dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr); @@ -637,10 +568,6 @@ static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc, wusb_dn_connect_new_connection(dnc) ? "connect" : "reconnect"); /* ACK the connect */ wusbhc_devconnect_ack(wusbhc, dnc, pr_cdid); -out: - d_fnend(3, dev, "(%p, %p, %zu) = void\n", - wusbhc, dn_hdr, size); - return; } /* @@ -662,60 +589,6 @@ static void wusbhc_handle_dn_disconnect(struct wusbhc *wusbhc, struct wusb_dev * } /* - * Reset a WUSB device on a HWA - * - * @wusbhc - * @port_idx Index of the port where the device is - * - * In Wireless USB, a reset is more or less equivalent to a full - * disconnect; so we just do a full disconnect and send the device a - * Device Reset IE (WUSB1.0[7.5.11]) giving it a few millisecs (6 MMCs). - * - * @wusbhc should be refcounted and unlocked - */ -int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port_idx) -{ - int result; - struct device *dev = wusbhc->dev; - struct wusb_dev *wusb_dev; - struct wuie_reset *ie; - - d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx); - mutex_lock(&wusbhc->mutex); - result = 0; - wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev; - if (wusb_dev == NULL) { - /* reset no device? ignore */ - dev_dbg(dev, "RESET: no device at port %u, ignoring\n", - port_idx); - goto error_unlock; - } - result = -ENOMEM; - ie = kzalloc(sizeof(*ie), GFP_KERNEL); - if (ie == NULL) - goto error_unlock; - ie->hdr.bLength = sizeof(ie->hdr) + sizeof(ie->CDID); - ie->hdr.bIEIdentifier = WUIE_ID_RESET_DEVICE; - ie->CDID = wusb_dev->cdid; - result = wusbhc_mmcie_set(wusbhc, 0xff, 6, &ie->hdr); - if (result < 0) { - dev_err(dev, "RESET: cant's set MMC: %d\n", result); - goto error_kfree; - } - __wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx)); - - /* 120ms, hopefully 6 MMCs (FIXME) */ - msleep(120); - wusbhc_mmcie_rm(wusbhc, &ie->hdr); -error_kfree: - kfree(ie); -error_unlock: - mutex_unlock(&wusbhc->mutex); - d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result); - return result; -} - -/* * Handle a Device Notification coming a host * * The Device Notification comes from a host (HWA, DWA or WHCI) @@ -735,19 +608,17 @@ void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr, struct device *dev = wusbhc->dev; struct wusb_dev *wusb_dev; - d_fnstart(3, dev, "(%p, %p)\n", wusbhc, dn_hdr); - if (size < sizeof(struct wusb_dn_hdr)) { dev_err(dev, "DN data shorter than DN header (%d < %d)\n", (int)size, (int)sizeof(struct wusb_dn_hdr)); - goto out; + return; } wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr); if (wusb_dev == NULL && dn_hdr->bType != WUSB_DN_CONNECT) { dev_dbg(dev, "ignoring DN %d from unconnected device %02x\n", dn_hdr->bType, srcaddr); - goto out; + return; } switch (dn_hdr->bType) { @@ -772,9 +643,6 @@ void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr, dev_warn(dev, "unknown DN %u (%d octets) from %u\n", dn_hdr->bType, (int)size, srcaddr); } -out: - d_fnend(3, dev, "(%p, %p) = void\n", wusbhc, dn_hdr); - return; } EXPORT_SYMBOL_GPL(wusbhc_handle_dn); @@ -804,59 +672,30 @@ void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port_idx) struct wusb_dev *wusb_dev; struct wuie_disconnect *ie; - d_fnstart(3, dev, "(%p, %u)\n", wusbhc, port_idx); - result = 0; wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev; if (wusb_dev == NULL) { /* reset no device? ignore */ dev_dbg(dev, "DISCONNECT: no device at port %u, ignoring\n", port_idx); - goto error; + return; } __wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx)); - result = -ENOMEM; ie = kzalloc(sizeof(*ie), GFP_KERNEL); if (ie == NULL) - goto error; + return; ie->hdr.bLength = sizeof(*ie); ie->hdr.bIEIdentifier = WUIE_ID_DEVICE_DISCONNECT; ie->bDeviceAddress = wusb_dev->addr; result = wusbhc_mmcie_set(wusbhc, 0, 0, &ie->hdr); - if (result < 0) { + if (result < 0) dev_err(dev, "DISCONNECT: can't set MMC: %d\n", result); - goto error_kfree; + else { + /* At least 6 MMCs, assuming at least 1 MMC per zone. */ + msleep(7*4); + wusbhc_mmcie_rm(wusbhc, &ie->hdr); } - - /* 120ms, hopefully 6 MMCs */ - msleep(100); - wusbhc_mmcie_rm(wusbhc, &ie->hdr); -error_kfree: kfree(ie); -error: - d_fnend(3, dev, "(%p, %u) = %d\n", wusbhc, port_idx, result); - return; -} - -static void wusb_cap_descr_printf(const unsigned level, struct device *dev, - const struct usb_wireless_cap_descriptor *wcd) -{ - d_printf(level, dev, - "WUSB Capability Descriptor\n" - " bDevCapabilityType 0x%02x\n" - " bmAttributes 0x%02x\n" - " wPhyRates 0x%04x\n" - " bmTFITXPowerInfo 0x%02x\n" - " bmFFITXPowerInfo 0x%02x\n" - " bmBandGroup 0x%04x\n" - " bReserved 0x%02x\n", - wcd->bDevCapabilityType, - wcd->bmAttributes, - le16_to_cpu(wcd->wPHYRates), - wcd->bmTFITXPowerInfo, - wcd->bmFFITXPowerInfo, - wcd->bmBandGroup, - wcd->bReserved); } /* @@ -899,8 +738,6 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev, } cap_size = cap_hdr->bLength; cap_type = cap_hdr->bDevCapabilityType; - d_printf(4, dev, "BOS Capability: 0x%02x (%zu bytes)\n", - cap_type, cap_size); if (cap_size == 0) break; if (cap_size > top - itr) { @@ -912,7 +749,6 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev, result = -EBADF; goto error_bad_cap; } - d_dump(3, dev, itr, cap_size); switch (cap_type) { case USB_CAP_TYPE_WIRELESS_USB: if (cap_size != sizeof(*wusb_dev->wusb_cap_descr)) @@ -920,10 +756,8 @@ static int wusb_dev_bos_grok(struct usb_device *usb_dev, "descriptor is %zu bytes vs %zu " "needed\n", cap_size, sizeof(*wusb_dev->wusb_cap_descr)); - else { + else wusb_dev->wusb_cap_descr = itr; - wusb_cap_descr_printf(3, dev, itr); - } break; default: dev_err(dev, "BUG? Unknown BOS capability 0x%02x " @@ -988,9 +822,7 @@ static int wusb_dev_bos_add(struct usb_device *usb_dev, "%zu bytes): %zd\n", desc_size, result); goto error_get_descriptor; } - d_printf(2, dev, "Got BOS descriptor %zd bytes, %u capabilities\n", - result, bos->bNumDeviceCaps); - d_dump(2, dev, bos, result); + result = wusb_dev_bos_grok(usb_dev, wusb_dev, bos, result); if (result < 0) goto error_bad_bos; @@ -1056,8 +888,6 @@ static void wusb_dev_add_ncb(struct usb_device *usb_dev) if (usb_dev->wusb == 0 || usb_dev->devnum == 1) return; /* skip non wusb and wusb RHs */ - d_fnstart(3, dev, "(usb_dev %p)\n", usb_dev); - wusbhc = wusbhc_get_by_usb_dev(usb_dev); if (wusbhc == NULL) goto error_nodev; @@ -1087,7 +917,6 @@ out: wusb_dev_put(wusb_dev); wusbhc_put(wusbhc); error_nodev: - d_fnend(3, dev, "(usb_dev %p) = void\n", usb_dev); return; wusb_dev_sysfs_rm(wusb_dev); @@ -1174,11 +1003,10 @@ EXPORT_SYMBOL_GPL(__wusb_dev_get_by_usb_dev); void wusb_dev_destroy(struct kref *_wusb_dev) { - struct wusb_dev *wusb_dev - = container_of(_wusb_dev, struct wusb_dev, refcnt); + struct wusb_dev *wusb_dev = container_of(_wusb_dev, struct wusb_dev, refcnt); + list_del_init(&wusb_dev->cack_node); wusb_dev_free(wusb_dev); - d_fnend(1, NULL, "%s (wusb_dev %p) = void\n", __func__, wusb_dev); } EXPORT_SYMBOL_GPL(wusb_dev_destroy); @@ -1190,8 +1018,6 @@ EXPORT_SYMBOL_GPL(wusb_dev_destroy); */ int wusbhc_devconnect_create(struct wusbhc *wusbhc) { - d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc); - wusbhc->keep_alive_ie.hdr.bIEIdentifier = WUIE_ID_KEEP_ALIVE; wusbhc->keep_alive_ie.hdr.bLength = sizeof(wusbhc->keep_alive_ie.hdr); INIT_DELAYED_WORK(&wusbhc->keep_alive_timer, wusbhc_keep_alive_run); @@ -1200,7 +1026,6 @@ int wusbhc_devconnect_create(struct wusbhc *wusbhc) wusbhc->cack_ie.hdr.bLength = sizeof(wusbhc->cack_ie.hdr); INIT_LIST_HEAD(&wusbhc->cack_list); - d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc); return 0; } @@ -1209,8 +1034,7 @@ int wusbhc_devconnect_create(struct wusbhc *wusbhc) */ void wusbhc_devconnect_destroy(struct wusbhc *wusbhc) { - d_fnstart(3, wusbhc->dev, "(wusbhc %p)\n", wusbhc); - d_fnend(3, wusbhc->dev, "(wusbhc %p) = void\n", wusbhc); + /* no op */ } /* @@ -1222,8 +1046,7 @@ void wusbhc_devconnect_destroy(struct wusbhc *wusbhc) * FIXME: This also enables the keep alives but this is not necessary * until there are connected and authenticated devices. */ -int wusbhc_devconnect_start(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid) +int wusbhc_devconnect_start(struct wusbhc *wusbhc) { struct device *dev = wusbhc->dev; struct wuie_host_info *hi; @@ -1236,7 +1059,7 @@ int wusbhc_devconnect_start(struct wusbhc *wusbhc, hi->hdr.bLength = sizeof(*hi); hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO; hi->attributes = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL); - hi->CHID = *chid; + hi->CHID = wusbhc->chid; result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr); if (result < 0) { dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result); diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c index cfa77a01cebd..3b52161e6e9c 100644 --- a/drivers/usb/wusbcore/mmc.c +++ b/drivers/usb/wusbcore/mmc.c @@ -159,15 +159,35 @@ found: } EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm); +static int wusbhc_mmc_start(struct wusbhc *wusbhc) +{ + int ret; + + mutex_lock(&wusbhc->mutex); + ret = wusbhc->start(wusbhc); + if (ret >= 0) + wusbhc->active = 1; + mutex_unlock(&wusbhc->mutex); + + return ret; +} + +static void wusbhc_mmc_stop(struct wusbhc *wusbhc) +{ + mutex_lock(&wusbhc->mutex); + wusbhc->active = 0; + wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS); + mutex_unlock(&wusbhc->mutex); +} + /* * wusbhc_start - start transmitting MMCs and accepting connections * @wusbhc: the HC to start - * @chid: the CHID to use for this host * * Establishes a cluster reservation, enables device connections, and * starts MMCs with appropriate DNTS parameters. */ -int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) +int wusbhc_start(struct wusbhc *wusbhc) { int result; struct device *dev = wusbhc->dev; @@ -181,7 +201,7 @@ int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) goto error_rsv_establish; } - result = wusbhc_devconnect_start(wusbhc, chid); + result = wusbhc_devconnect_start(wusbhc); if (result < 0) { dev_err(dev, "error enabling device connections: %d\n", result); goto error_devconnect_start; @@ -199,12 +219,12 @@ int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) dev_err(dev, "Cannot set DNTS parameters: %d\n", result); goto error_set_num_dnts; } - result = wusbhc->start(wusbhc); + result = wusbhc_mmc_start(wusbhc); if (result < 0) { dev_err(dev, "error starting wusbch: %d\n", result); goto error_wusbhc_start; } - wusbhc->active = 1; + return 0; error_wusbhc_start: @@ -219,76 +239,17 @@ error_rsv_establish: } /* - * Disconnect all from the WUSB Channel - * - * Send a Host Disconnect IE in the MMC, wait, don't send it any more - */ -static int __wusbhc_host_disconnect_ie(struct wusbhc *wusbhc) -{ - int result = -ENOMEM; - struct wuie_host_disconnect *host_disconnect_ie; - might_sleep(); - host_disconnect_ie = kmalloc(sizeof(*host_disconnect_ie), GFP_KERNEL); - if (host_disconnect_ie == NULL) - goto error_alloc; - host_disconnect_ie->hdr.bLength = sizeof(*host_disconnect_ie); - host_disconnect_ie->hdr.bIEIdentifier = WUIE_ID_HOST_DISCONNECT; - result = wusbhc_mmcie_set(wusbhc, 0, 0, &host_disconnect_ie->hdr); - if (result < 0) - goto error_mmcie_set; - - /* WUSB1.0[8.5.3.1 & 7.5.2] */ - msleep(100); - wusbhc_mmcie_rm(wusbhc, &host_disconnect_ie->hdr); -error_mmcie_set: - kfree(host_disconnect_ie); -error_alloc: - return result; -} - -/* * wusbhc_stop - stop transmitting MMCs * @wusbhc: the HC to stop * - * Send a Host Disconnect IE, wait, remove all the MMCs (stop sending MMCs). - * - * If we can't allocate a Host Stop IE, screw it, we don't notify the - * devices we are disconnecting... + * Stops the WUSB channel and removes the cluster reservation. */ void wusbhc_stop(struct wusbhc *wusbhc) { - if (wusbhc->active) { - wusbhc->active = 0; - wusbhc->stop(wusbhc); - wusbhc_sec_stop(wusbhc); - __wusbhc_host_disconnect_ie(wusbhc); - wusbhc_devconnect_stop(wusbhc); - wusbhc_rsv_terminate(wusbhc); - } -} -EXPORT_SYMBOL_GPL(wusbhc_stop); - -/* - * Change the CHID in a WUSB Channel - * - * If it is just a new CHID, send a Host Disconnect IE and then change - * the CHID IE. - */ -static int __wusbhc_chid_change(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid) -{ - int result = -ENOSYS; - struct device *dev = wusbhc->dev; - dev_err(dev, "%s() not implemented yet\n", __func__); - return result; - - BUG_ON(wusbhc->wuie_host_info == NULL); - __wusbhc_host_disconnect_ie(wusbhc); - wusbhc->wuie_host_info->CHID = *chid; - result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->wuie_host_info->hdr); - if (result < 0) - dev_err(dev, "Can't update Host Info WUSB IE: %d\n", result); - return result; + wusbhc_mmc_stop(wusbhc); + wusbhc_sec_stop(wusbhc); + wusbhc_devconnect_stop(wusbhc); + wusbhc_rsv_terminate(wusbhc); } /* @@ -306,16 +267,19 @@ int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid) chid = NULL; mutex_lock(&wusbhc->mutex); - if (wusbhc->active) { - if (chid) - result = __wusbhc_chid_change(wusbhc, chid); - else - wusbhc_stop(wusbhc); - } else { - if (chid) - wusbhc_start(wusbhc, chid); + if (chid) { + if (wusbhc->active) { + mutex_unlock(&wusbhc->mutex); + return -EBUSY; + } + wusbhc->chid = *chid; } mutex_unlock(&wusbhc->mutex); + + if (chid) + result = uwb_radio_start(&wusbhc->pal); + else + uwb_radio_stop(&wusbhc->pal); return result; } EXPORT_SYMBOL_GPL(wusbhc_chid_set); diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c index 7cc51e9905cf..d0b172c5ecc7 100644 --- a/drivers/usb/wusbcore/pal.c +++ b/drivers/usb/wusbcore/pal.c @@ -18,6 +18,16 @@ */ #include "wusbhc.h" +static void wusbhc_channel_changed(struct uwb_pal *pal, int channel) +{ + struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal); + + if (channel < 0) + wusbhc_stop(wusbhc); + else + wusbhc_start(wusbhc); +} + /** * wusbhc_pal_register - register the WUSB HC as a UWB PAL * @wusbhc: the WUSB HC @@ -28,8 +38,10 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) wusbhc->pal.name = "wusbhc"; wusbhc->pal.device = wusbhc->usb_hcd.self.controller; + wusbhc->pal.rc = wusbhc->uwb_rc; + wusbhc->pal.channel_changed = wusbhc_channel_changed; - return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal); + return uwb_pal_register(&wusbhc->pal); } /** @@ -38,5 +50,5 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) */ void wusbhc_pal_unregister(struct wusbhc *wusbhc) { - uwb_pal_unregister(wusbhc->uwb_rc, &wusbhc->pal); + uwb_pal_unregister(&wusbhc->pal); } diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c index fc63e77ded2d..4ed97360c046 100644 --- a/drivers/usb/wusbcore/reservation.c +++ b/drivers/usb/wusbcore/reservation.c @@ -48,18 +48,19 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv) { struct wusbhc *wusbhc = rsv->pal_priv; struct device *dev = wusbhc->dev; + struct uwb_mas_bm mas; char buf[72]; switch (rsv->state) { case UWB_RSV_STATE_O_ESTABLISHED: - bitmap_scnprintf(buf, sizeof(buf), rsv->mas.bm, UWB_NUM_MAS); + uwb_rsv_get_usable_mas(rsv, &mas); + bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS); dev_dbg(dev, "established reservation: %s\n", buf); - wusbhc_bwa_set(wusbhc, rsv->stream, &rsv->mas); + wusbhc_bwa_set(wusbhc, rsv->stream, &mas); break; case UWB_RSV_STATE_NONE: dev_dbg(dev, "removed reservation\n"); wusbhc_bwa_set(wusbhc, 0, NULL); - wusbhc->rsv = NULL; break; default: dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state); @@ -86,13 +87,12 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc) bcid.data[0] = wusbhc->cluster_id; bcid.data[1] = 0; - rsv->owner = &rc->uwb_dev; rsv->target.type = UWB_RSV_TARGET_DEVADDR; rsv->target.devaddr = bcid; rsv->type = UWB_DRP_TYPE_PRIVATE; - rsv->max_mas = 256; - rsv->min_mas = 16; /* one MAS per zone? */ - rsv->sparsity = 16; /* at least one MAS in each zone? */ + rsv->max_mas = 256; /* try to get as much as possible */ + rsv->min_mas = 15; /* one MAS per zone */ + rsv->max_interval = 1; /* max latency is one zone */ rsv->is_multicast = true; ret = uwb_rsv_establish(rsv); @@ -105,11 +105,14 @@ int wusbhc_rsv_establish(struct wusbhc *wusbhc) /** - * wusbhc_rsv_terminate - terminate any cluster reservation + * wusbhc_rsv_terminate - terminate the cluster reservation * @wusbhc: the WUSB host whose reservation is to be terminated */ void wusbhc_rsv_terminate(struct wusbhc *wusbhc) { - if (wusbhc->rsv) + if (wusbhc->rsv) { uwb_rsv_terminate(wusbhc->rsv); + uwb_rsv_destroy(wusbhc->rsv); + wusbhc->rsv = NULL; + } } diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c index 267a64325106..95c6fa3bf6b2 100644 --- a/drivers/usb/wusbcore/rh.c +++ b/drivers/usb/wusbcore/rh.c @@ -71,19 +71,20 @@ */ #include "wusbhc.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - /* * Reset a fake port * - * This can be called to reset a port from any other state or to reset - * it when connecting. In Wireless USB they are different; when doing - * a new connect that involves going over the authentication. When - * just reseting, its a different story. + * Using a Reset Device IE is too heavyweight as it causes the device + * to enter the UnConnected state and leave the cluster, this can mean + * that when the device reconnects it is connected to a different fake + * port. + * + * Instead, reset authenticated devices with a SetAddress(0), followed + * by a SetAddresss(AuthAddr). * - * The Linux USB stack resets a port twice before it considers it - * enabled, so we have to detect and ignore that. + * For unauthenticated devices just pretend to reset but do nothing. + * If the device initialization continues to fail it will eventually + * time out after TrustTimeout and enter the UnConnected state. * * @wusbhc is assumed referenced and @wusbhc->mutex unlocked. * @@ -97,20 +98,20 @@ static int wusbhc_rh_port_reset(struct wusbhc *wusbhc, u8 port_idx) { int result = 0; struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx); + struct wusb_dev *wusb_dev = port->wusb_dev; - d_fnstart(3, wusbhc->dev, "(wusbhc %p port_idx %u)\n", - wusbhc, port_idx); - if (port->reset_count == 0) { - wusbhc_devconnect_auth(wusbhc, port_idx); - port->reset_count++; - } else if (port->reset_count == 1) - /* see header */ - d_printf(2, wusbhc->dev, "Ignoring second reset on port_idx " - "%u\n", port_idx); + port->status |= USB_PORT_STAT_RESET; + port->change |= USB_PORT_STAT_C_RESET; + + if (wusb_dev->addr & WUSB_DEV_ADDR_UNAUTH) + result = 0; else - result = wusbhc_dev_reset(wusbhc, port_idx); - d_fnend(3, wusbhc->dev, "(wusbhc %p port_idx %u) = %d\n", - wusbhc, port_idx, result); + result = wusb_dev_update_address(wusbhc, wusb_dev); + + port->status &= ~USB_PORT_STAT_RESET; + port->status |= USB_PORT_STAT_ENABLE; + port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE; + return result; } @@ -138,7 +139,6 @@ int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf) size_t cnt, size; unsigned long *buf = (unsigned long *) _buf; - d_fnstart(1, wusbhc->dev, "(wusbhc %p)\n", wusbhc); /* WE DON'T LOCK, see comment */ size = wusbhc->ports_max + 1 /* hub bit */; size = (size + 8 - 1) / 8; /* round to bytes */ @@ -147,8 +147,6 @@ int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf) set_bit(cnt + 1, buf); else clear_bit(cnt + 1, buf); - d_fnend(1, wusbhc->dev, "(wusbhc %p) %u, buffer:\n", wusbhc, (int)size); - d_dump(1, wusbhc->dev, _buf, size); return size; } EXPORT_SYMBOL_GPL(wusbhc_rh_status_data); @@ -197,9 +195,7 @@ static int wusbhc_rh_get_hub_descr(struct wusbhc *wusbhc, u16 wValue, static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature) { int result; - struct device *dev = wusbhc->dev; - d_fnstart(4, dev, "(%p, feature 0x%04u)\n", wusbhc, feature); switch (feature) { case C_HUB_LOCAL_POWER: /* FIXME: maybe plug bit 0 to the power input status, @@ -211,7 +207,6 @@ static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature) default: result = -EPIPE; } - d_fnend(4, dev, "(%p, feature 0x%04u), %d\n", wusbhc, feature, result); return result; } @@ -238,14 +233,10 @@ static int wusbhc_rh_get_hub_status(struct wusbhc *wusbhc, u32 *buf, static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature, u8 selector, u8 port_idx) { - int result = -EINVAL; struct device *dev = wusbhc->dev; - d_fnstart(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d)\n", - feature, selector, port_idx); - if (port_idx > wusbhc->ports_max) - goto error; + return -EINVAL; switch (feature) { /* According to USB2.0[11.24.2.13]p2, these features @@ -255,35 +246,27 @@ static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature, case USB_PORT_FEAT_C_SUSPEND: case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: - result = 0; - break; - + return 0; case USB_PORT_FEAT_POWER: /* No such thing, but we fake it works */ mutex_lock(&wusbhc->mutex); wusb_port_by_idx(wusbhc, port_idx)->status |= USB_PORT_STAT_POWER; mutex_unlock(&wusbhc->mutex); - result = 0; - break; + return 0; case USB_PORT_FEAT_RESET: - result = wusbhc_rh_port_reset(wusbhc, port_idx); - break; + return wusbhc_rh_port_reset(wusbhc, port_idx); case USB_PORT_FEAT_ENABLE: case USB_PORT_FEAT_SUSPEND: dev_err(dev, "(port_idx %d) set feat %d/%d UNIMPLEMENTED\n", port_idx, feature, selector); - result = -ENOSYS; - break; + return -ENOSYS; default: dev_err(dev, "(port_idx %d) set feat %d/%d UNKNOWN\n", port_idx, feature, selector); - result = -EPIPE; - break; + return -EPIPE; } -error: - d_fnend(4, dev, "(feat 0x%04u, selector 0x%u, port_idx %d) = %d\n", - feature, selector, port_idx, result); - return result; + + return 0; } /* @@ -294,17 +277,13 @@ error: static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature, u8 selector, u8 port_idx) { - int result = -EINVAL; + int result = 0; struct device *dev = wusbhc->dev; - d_fnstart(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d)\n", - wusbhc, feature, selector, port_idx); - if (port_idx > wusbhc->ports_max) - goto error; + return -EINVAL; mutex_lock(&wusbhc->mutex); - result = 0; switch (feature) { case USB_PORT_FEAT_POWER: /* fake port always on */ /* According to USB2.0[11.24.2.7.1.4], no need to implement? */ @@ -324,10 +303,8 @@ static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature, break; case USB_PORT_FEAT_SUSPEND: case USB_PORT_FEAT_C_SUSPEND: - case 0xffff: /* ??? FIXME */ dev_err(dev, "(port_idx %d) Clear feat %d/%d UNIMPLEMENTED\n", port_idx, feature, selector); - /* dump_stack(); */ result = -ENOSYS; break; default: @@ -337,9 +314,7 @@ static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature, break; } mutex_unlock(&wusbhc->mutex); -error: - d_fnend(4, dev, "(wusbhc %p feat 0x%04x selector %d port_idx %d) = " - "%d\n", wusbhc, feature, selector, port_idx, result); + return result; } @@ -351,22 +326,17 @@ error: static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx, u32 *_buf, u16 wLength) { - int result = -EINVAL; u16 *buf = (u16 *) _buf; - d_fnstart(1, wusbhc->dev, "(wusbhc %p port_idx %u wLength %u)\n", - wusbhc, port_idx, wLength); if (port_idx > wusbhc->ports_max) - goto error; + return -EINVAL; + mutex_lock(&wusbhc->mutex); buf[0] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->status); buf[1] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->change); - result = 0; mutex_unlock(&wusbhc->mutex); -error: - d_fnend(1, wusbhc->dev, "(wusbhc %p) = %d, buffer:\n", wusbhc, result); - d_dump(1, wusbhc->dev, _buf, wLength); - return result; + + return 0; } /* diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c index a101cad6a8d4..f4aa28eca70d 100644 --- a/drivers/usb/wusbcore/security.c +++ b/drivers/usb/wusbcore/security.c @@ -27,19 +27,6 @@ #include <linux/random.h> #include "wusbhc.h" -/* - * DEBUG & SECURITY WARNING!!!! - * - * If you enable this past 1, the debug code will weaken the - * cryptographic safety of the system (on purpose, for debugging). - * - * Weaken means: - * we print secret keys and intermediate values all the way, - */ -#undef D_LOCAL -#define D_LOCAL 2 -#include <linux/uwb/debug.h> - static void wusbhc_set_gtk_callback(struct urb *urb); static void wusbhc_gtk_rekey_done_work(struct work_struct *work); @@ -219,7 +206,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, const void *itr, *top; char buf[64]; - d_fnstart(3, dev, "(usb_dev %p, wusb_dev %p)\n", usb_dev, wusb_dev); result = usb_get_descriptor(usb_dev, USB_DT_SECURITY, 0, &secd, sizeof(secd)); if (result < sizeof(secd)) { @@ -228,8 +214,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, goto error_secd; } secd_size = le16_to_cpu(secd.wTotalLength); - d_printf(5, dev, "got %d bytes of sec descriptor, total is %d\n", - result, secd_size); secd_buf = kmalloc(secd_size, GFP_KERNEL); if (secd_buf == NULL) { dev_err(dev, "Can't allocate space for security descriptors\n"); @@ -242,7 +226,6 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, "not enough data: %d\n", result); goto error_secd_all; } - d_printf(5, dev, "got %d bytes of sec descriptors\n", result); bytes = 0; itr = secd_buf + sizeof(secd); top = secd_buf + result; @@ -279,14 +262,12 @@ int wusb_dev_sec_add(struct wusbhc *wusbhc, goto error_no_ccm1; } wusb_dev->ccm1_etd = *ccm1_etd; - dev_info(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n", - buf, wusb_et_name(ccm1_etd->bEncryptionType), - ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex); + dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n", + buf, wusb_et_name(ccm1_etd->bEncryptionType), + ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex); result = 0; kfree(secd_buf); out: - d_fnend(3, dev, "(usb_dev %p, wusb_dev %p) = %d\n", - usb_dev, wusb_dev, result); return result; @@ -303,32 +284,6 @@ void wusb_dev_sec_rm(struct wusb_dev *wusb_dev) /* Nothing so far */ } -static void hs_printk(unsigned level, struct device *dev, - struct usb_handshake *hs) -{ - d_printf(level, dev, - " bMessageNumber: %u\n" - " bStatus: %u\n" - " tTKID: %02x %02x %02x\n" - " CDID: %02x %02x %02x %02x %02x %02x %02x %02x\n" - " %02x %02x %02x %02x %02x %02x %02x %02x\n" - " nonce: %02x %02x %02x %02x %02x %02x %02x %02x\n" - " %02x %02x %02x %02x %02x %02x %02x %02x\n" - " MIC: %02x %02x %02x %02x %02x %02x %02x %02x\n", - hs->bMessageNumber, hs->bStatus, - hs->tTKID[2], hs->tTKID[1], hs->tTKID[0], - hs->CDID[0], hs->CDID[1], hs->CDID[2], hs->CDID[3], - hs->CDID[4], hs->CDID[5], hs->CDID[6], hs->CDID[7], - hs->CDID[8], hs->CDID[9], hs->CDID[10], hs->CDID[11], - hs->CDID[12], hs->CDID[13], hs->CDID[14], hs->CDID[15], - hs->nonce[0], hs->nonce[1], hs->nonce[2], hs->nonce[3], - hs->nonce[4], hs->nonce[5], hs->nonce[6], hs->nonce[7], - hs->nonce[8], hs->nonce[9], hs->nonce[10], hs->nonce[11], - hs->nonce[12], hs->nonce[13], hs->nonce[14], hs->nonce[15], - hs->MIC[0], hs->MIC[1], hs->MIC[2], hs->MIC[3], - hs->MIC[4], hs->MIC[5], hs->MIC[6], hs->MIC[7]); -} - /** * Update the address of an unauthenticated WUSB device * @@ -338,8 +293,7 @@ static void hs_printk(unsigned level, struct device *dev, * Before the device's address (as known by it) was usb_dev->devnum | * 0x80 (unauthenticated address). With this we update it to usb_dev->devnum. */ -static int wusb_dev_update_address(struct wusbhc *wusbhc, - struct wusb_dev *wusb_dev) +int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev) { int result = -ENOMEM; struct usb_device *usb_dev = wusb_dev->usb_dev; @@ -422,9 +376,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, get_random_bytes(&hs[0].nonce, sizeof(hs[0].nonce)); memset(hs[0].MIC, 0, sizeof(hs[0].MIC)); /* Per WUSB1.0[T7-22] */ - d_printf(1, dev, "I: sending hs1:\n"); - hs_printk(2, dev, &hs[0]); - result = usb_control_msg( usb_dev, usb_sndctrlpipe(usb_dev, 0), USB_REQ_SET_HANDSHAKE, @@ -445,8 +396,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, dev_err(dev, "Handshake2: request failed: %d\n", result); goto error_hs2; } - d_printf(1, dev, "got HS2:\n"); - hs_printk(2, dev, &hs[1]); result = -EINVAL; if (hs[1].bMessageNumber != 2) { @@ -487,10 +436,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, result); goto error_hs2; } - d_printf(2, dev, "KCK:\n"); - d_dump(2, dev, keydvt_out.kck, sizeof(keydvt_out.kck)); - d_printf(2, dev, "PTK:\n"); - d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk)); /* Compute MIC and verify it */ result = wusb_oob_mic(mic, keydvt_out.kck, &ccm_n, &hs[1]); @@ -500,8 +445,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, goto error_hs2; } - d_printf(2, dev, "MIC:\n"); - d_dump(2, dev, mic, sizeof(mic)); if (memcmp(hs[1].MIC, mic, sizeof(hs[1].MIC))) { dev_err(dev, "Handshake2 failed: MIC mismatch\n"); goto error_hs2; @@ -521,9 +464,6 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, goto error_hs2; } - d_printf(1, dev, "I: sending hs3:\n"); - hs_printk(2, dev, &hs[2]); - result = usb_control_msg( usb_dev, usb_sndctrlpipe(usb_dev, 0), USB_REQ_SET_HANDSHAKE, @@ -534,14 +474,11 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, goto error_hs3; } - d_printf(1, dev, "I: turning on encryption on host for device\n"); - d_dump(2, dev, keydvt_out.ptk, sizeof(keydvt_out.ptk)); result = wusbhc->set_ptk(wusbhc, wusb_dev->port_idx, tkid, keydvt_out.ptk, sizeof(keydvt_out.ptk)); if (result < 0) goto error_wusbhc_set_ptk; - d_printf(1, dev, "I: setting a GTK\n"); result = wusb_dev_set_gtk(wusbhc, wusb_dev); if (result < 0) { dev_err(dev, "Set GTK for device: request failed: %d\n", @@ -551,13 +488,12 @@ int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, /* Update the device's address from unauth to auth */ if (usb_dev->authenticated == 0) { - d_printf(1, dev, "I: updating addres to auth from non-auth\n"); result = wusb_dev_update_address(wusbhc, wusb_dev); if (result < 0) goto error_dev_update_address; } result = 0; - d_printf(1, dev, "I: 4way handshke done, device authenticated\n"); + dev_info(dev, "device authenticated\n"); error_dev_update_address: error_wusbhc_set_gtk: @@ -570,10 +506,8 @@ error_hs1: memset(&keydvt_in, 0, sizeof(keydvt_in)); memset(&ccm_n, 0, sizeof(ccm_n)); memset(mic, 0, sizeof(mic)); - if (result < 0) { - /* error path */ + if (result < 0) wusb_dev_set_encryption(usb_dev, 0); - } error_dev_set_encryption: kfree(hs); error_kzalloc: diff --git a/drivers/usb/wusbcore/wa-nep.c b/drivers/usb/wusbcore/wa-nep.c index 3f542990c73f..17d2626038be 100644 --- a/drivers/usb/wusbcore/wa-nep.c +++ b/drivers/usb/wusbcore/wa-nep.c @@ -51,7 +51,7 @@ */ #include <linux/workqueue.h> #include <linux/ctype.h> -#include <linux/uwb/debug.h> + #include "wa-hc.h" #include "wusbhc.h" @@ -139,13 +139,10 @@ static void wa_notif_dispatch(struct work_struct *ws) /* FIXME: unimplemented WA NOTIFs */ /* fallthru */ default: - if (printk_ratelimit()) { - dev_err(dev, "HWA: unknown notification 0x%x, " - "%zu bytes; discarding\n", - notif_hdr->bNotifyType, - (size_t)notif_hdr->bLength); - dump_bytes(dev, notif_hdr, 16); - } + dev_err(dev, "HWA: unknown notification 0x%x, " + "%zu bytes; discarding\n", + notif_hdr->bNotifyType, + (size_t)notif_hdr->bLength); break; } } @@ -160,12 +157,9 @@ out: * discard the data, as this should not happen. */ exhausted_buffer: - if (!printk_ratelimit()) - goto out; dev_warn(dev, "HWA: device sent short notification, " "%d bytes missing; discarding %d bytes.\n", missing, (int)size); - dump_bytes(dev, itr, size); goto out; } diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c index f18e4aae66e9..7369655f69cd 100644 --- a/drivers/usb/wusbcore/wa-rpipe.c +++ b/drivers/usb/wusbcore/wa-rpipe.c @@ -60,13 +60,10 @@ #include <linux/init.h> #include <asm/atomic.h> #include <linux/bitmap.h> + #include "wusbhc.h" #include "wa-hc.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - - static int __rpipe_get_descr(struct wahc *wa, struct usb_rpipe_descriptor *descr, u16 index) { @@ -76,7 +73,6 @@ static int __rpipe_get_descr(struct wahc *wa, /* Get the RPIPE descriptor -- we cannot use the usb_get_descriptor() * function because the arguments are different. */ - d_printf(1, dev, "rpipe %u: get descr\n", index); result = usb_control_msg( wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0), USB_REQ_GET_DESCRIPTOR, @@ -115,7 +111,6 @@ static int __rpipe_set_descr(struct wahc *wa, /* we cannot use the usb_get_descriptor() function because the * arguments are different. */ - d_printf(1, dev, "rpipe %u: set descr\n", index); result = usb_control_msg( wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0), USB_REQ_SET_DESCRIPTOR, @@ -174,13 +169,12 @@ void rpipe_destroy(struct kref *_rpipe) { struct wa_rpipe *rpipe = container_of(_rpipe, struct wa_rpipe, refcnt); u8 index = le16_to_cpu(rpipe->descr.wRPipeIndex); - d_fnstart(1, NULL, "(rpipe %p %u)\n", rpipe, index); + if (rpipe->ep) rpipe->ep->hcpriv = NULL; rpipe_put_idx(rpipe->wa, index); wa_put(rpipe->wa); kfree(rpipe); - d_fnend(1, NULL, "(rpipe %p %u)\n", rpipe, index); } EXPORT_SYMBOL_GPL(rpipe_destroy); @@ -202,7 +196,6 @@ static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs, struct wa_rpipe *rpipe; struct device *dev = &wa->usb_iface->dev; - d_fnstart(3, dev, "(wa %p crs 0x%02x)\n", wa, crs); rpipe = kzalloc(sizeof(*rpipe), gfp); if (rpipe == NULL) return -ENOMEM; @@ -223,14 +216,12 @@ static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs, } *prpipe = NULL; kfree(rpipe); - d_fnend(3, dev, "(wa %p crs 0x%02x) = -ENXIO\n", wa, crs); return -ENXIO; found: set_bit(rpipe_idx, wa->rpipe_bm); rpipe->wa = wa_get(wa); *prpipe = rpipe; - d_fnstart(3, dev, "(wa %p crs 0x%02x) = 0\n", wa, crs); return 0; } @@ -239,7 +230,6 @@ static int __rpipe_reset(struct wahc *wa, unsigned index) int result; struct device *dev = &wa->usb_iface->dev; - d_printf(1, dev, "rpipe %u: reset\n", index); result = usb_control_msg( wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0), USB_REQ_RPIPE_RESET, @@ -276,7 +266,6 @@ static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find( struct usb_descriptor_header *hdr; struct usb_wireless_ep_comp_descriptor *epcd; - d_fnstart(3, dev, "(ep %p)\n", ep); if (ep->desc.bEndpointAddress == 0) { epcd = &epc0; goto out; @@ -310,7 +299,6 @@ static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find( itr_size -= hdr->bDescriptorType; } out: - d_fnend(3, dev, "(ep %p) = %p\n", ep, epcd); return epcd; } @@ -329,8 +317,6 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa, struct usb_wireless_ep_comp_descriptor *epcd; u8 unauth; - d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n", - rpipe, wa, ep, urb); epcd = rpipe_epc_find(dev, ep); if (epcd == NULL) { dev_err(dev, "ep 0x%02x: can't find companion descriptor\n", @@ -350,10 +336,12 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa, /* FIXME: use maximum speed as supported or recommended by device */ rpipe->descr.bSpeed = usb_pipeendpoint(urb->pipe) == 0 ? UWB_PHY_RATE_53 : UWB_PHY_RATE_200; - d_printf(2, dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n", - urb->dev->devnum, urb->dev->devnum | unauth, - le16_to_cpu(rpipe->descr.wRPipeIndex), - usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed); + + dev_dbg(dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n", + urb->dev->devnum, urb->dev->devnum | unauth, + le16_to_cpu(rpipe->descr.wRPipeIndex), + usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed); + /* see security.c:wusb_update_address() */ if (unlikely(urb->dev->devnum == 0x80)) rpipe->descr.bDeviceAddress = 0; @@ -384,8 +372,6 @@ static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa, } result = 0; error: - d_fnend(3, dev, "(rpipe %p wa %p ep %p urb %p) = %d\n", - rpipe, wa, ep, urb, result); return result; } @@ -405,8 +391,6 @@ static int rpipe_check_aim(const struct wa_rpipe *rpipe, const struct wahc *wa, u8 unauth = (usb_dev->wusb && !usb_dev->authenticated) ? 0x80 : 0; u8 portnum = wusb_port_no_to_idx(urb->dev->portnum); - d_fnstart(3, dev, "(rpipe %p wa %p ep %p, urb %p)\n", - rpipe, wa, ep, urb); #define AIM_CHECK(rdf, val, text) \ do { \ if (rpipe->descr.rdf != (val)) { \ @@ -451,8 +435,6 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep, struct wa_rpipe *rpipe; u8 eptype; - d_fnstart(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb, - gfp); mutex_lock(&wa->rpipe_mutex); rpipe = ep->hcpriv; if (rpipe != NULL) { @@ -462,9 +444,9 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep, goto error; } __rpipe_get(rpipe); - d_printf(2, dev, "ep 0x%02x: reusing rpipe %u\n", - ep->desc.bEndpointAddress, - le16_to_cpu(rpipe->descr.wRPipeIndex)); + dev_dbg(dev, "ep 0x%02x: reusing rpipe %u\n", + ep->desc.bEndpointAddress, + le16_to_cpu(rpipe->descr.wRPipeIndex)); } else { /* hmm, assign idle rpipe, aim it */ result = -ENOBUFS; @@ -480,14 +462,12 @@ int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep, ep->hcpriv = rpipe; rpipe->ep = ep; __rpipe_get(rpipe); /* for caching into ep->hcpriv */ - d_printf(2, dev, "ep 0x%02x: using rpipe %u\n", - ep->desc.bEndpointAddress, - le16_to_cpu(rpipe->descr.wRPipeIndex)); + dev_dbg(dev, "ep 0x%02x: using rpipe %u\n", + ep->desc.bEndpointAddress, + le16_to_cpu(rpipe->descr.wRPipeIndex)); } - d_dump(4, dev, &rpipe->descr, sizeof(rpipe->descr)); error: mutex_unlock(&wa->rpipe_mutex); - d_fnend(3, dev, "(wa %p ep %p urb %p gfp 0x%08x)\n", wa, ep, urb, gfp); return result; } @@ -507,7 +487,7 @@ int wa_rpipes_create(struct wahc *wa) void wa_rpipes_destroy(struct wahc *wa) { struct device *dev = &wa->usb_iface->dev; - d_fnstart(3, dev, "(wa %p)\n", wa); + if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) { char buf[256]; WARN_ON(1); @@ -515,7 +495,6 @@ void wa_rpipes_destroy(struct wahc *wa) dev_err(dev, "BUG: pipes not released on exit: %s\n", buf); } kfree(wa->rpipe_bm); - d_fnend(3, dev, "(wa %p)\n", wa); } /* @@ -530,33 +509,20 @@ void wa_rpipes_destroy(struct wahc *wa) */ void rpipe_ep_disable(struct wahc *wa, struct usb_host_endpoint *ep) { - struct device *dev = &wa->usb_iface->dev; struct wa_rpipe *rpipe; - d_fnstart(2, dev, "(wa %p ep %p)\n", wa, ep); + mutex_lock(&wa->rpipe_mutex); rpipe = ep->hcpriv; if (rpipe != NULL) { - unsigned rc = atomic_read(&rpipe->refcnt.refcount); - int result; u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex); - if (rc != 1) - d_printf(1, dev, "(wa %p ep %p) rpipe %p refcnt %u\n", - wa, ep, rpipe, rc); - - d_printf(1, dev, "rpipe %u: abort\n", index); - result = usb_control_msg( + usb_control_msg( wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0), USB_REQ_RPIPE_ABORT, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE, 0, index, NULL, 0, 1000 /* FIXME: arbitrary */); - if (result < 0 && result != -ENODEV /* dev is gone */) - d_printf(1, dev, "(wa %p rpipe %u): abort failed: %d\n", - wa, index, result); rpipe_put(rpipe); } mutex_unlock(&wa->rpipe_mutex); - d_fnend(2, dev, "(wa %p ep %p)\n", wa, ep); - return; } EXPORT_SYMBOL_GPL(rpipe_ep_disable); diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c index c038635d1c64..238a96aee3a1 100644 --- a/drivers/usb/wusbcore/wa-xfer.c +++ b/drivers/usb/wusbcore/wa-xfer.c @@ -82,13 +82,10 @@ #include <linux/init.h> #include <linux/spinlock.h> #include <linux/hash.h> + #include "wa-hc.h" #include "wusbhc.h" -#undef D_LOCAL -#define D_LOCAL 0 /* 0 disabled, > 0 different levels... */ -#include <linux/uwb/debug.h> - enum { WA_SEGS_MAX = 255, }; @@ -180,7 +177,6 @@ static void wa_xfer_destroy(struct kref *_xfer) } } kfree(xfer); - d_printf(2, NULL, "xfer %p destroyed\n", xfer); } static void wa_xfer_get(struct wa_xfer *xfer) @@ -190,10 +186,7 @@ static void wa_xfer_get(struct wa_xfer *xfer) static void wa_xfer_put(struct wa_xfer *xfer) { - d_fnstart(3, NULL, "(xfer %p) -- ref count bef put %d\n", - xfer, atomic_read(&xfer->refcnt.refcount)); kref_put(&xfer->refcnt, wa_xfer_destroy); - d_fnend(3, NULL, "(xfer %p) = void\n", xfer); } /* @@ -209,7 +202,7 @@ static void wa_xfer_put(struct wa_xfer *xfer) static void wa_xfer_giveback(struct wa_xfer *xfer) { unsigned long flags; - d_fnstart(3, NULL, "(xfer %p)\n", xfer); + spin_lock_irqsave(&xfer->wa->xfer_list_lock, flags); list_del_init(&xfer->list_node); spin_unlock_irqrestore(&xfer->wa->xfer_list_lock, flags); @@ -217,7 +210,6 @@ static void wa_xfer_giveback(struct wa_xfer *xfer) wusbhc_giveback_urb(xfer->wa->wusb, xfer->urb, xfer->result); wa_put(xfer->wa); wa_xfer_put(xfer); - d_fnend(3, NULL, "(xfer %p) = void\n", xfer); } /* @@ -227,13 +219,10 @@ static void wa_xfer_giveback(struct wa_xfer *xfer) */ static void wa_xfer_completion(struct wa_xfer *xfer) { - d_fnstart(3, NULL, "(xfer %p)\n", xfer); if (xfer->wusb_dev) wusb_dev_put(xfer->wusb_dev); rpipe_put(xfer->ep->hcpriv); wa_xfer_giveback(xfer); - d_fnend(3, NULL, "(xfer %p) = void\n", xfer); - return; } /* @@ -243,12 +232,12 @@ static void wa_xfer_completion(struct wa_xfer *xfer) */ static unsigned __wa_xfer_is_done(struct wa_xfer *xfer) { + struct device *dev = &xfer->wa->usb_iface->dev; unsigned result, cnt; struct wa_seg *seg; struct urb *urb = xfer->urb; unsigned found_short = 0; - d_fnstart(3, NULL, "(xfer %p)\n", xfer); result = xfer->segs_done == xfer->segs_submitted; if (result == 0) goto out; @@ -258,10 +247,8 @@ static unsigned __wa_xfer_is_done(struct wa_xfer *xfer) switch (seg->status) { case WA_SEG_DONE: if (found_short && seg->result > 0) { - if (printk_ratelimit()) - printk(KERN_ERR "xfer %p#%u: bad short " - "segments (%zu)\n", xfer, cnt, - seg->result); + dev_dbg(dev, "xfer %p#%u: bad short segments (%zu)\n", + xfer, cnt, seg->result); urb->status = -EINVAL; goto out; } @@ -269,36 +256,30 @@ static unsigned __wa_xfer_is_done(struct wa_xfer *xfer) if (seg->result < xfer->seg_size && cnt != xfer->segs-1) found_short = 1; - d_printf(2, NULL, "xfer %p#%u: DONE short %d " - "result %zu urb->actual_length %d\n", - xfer, seg->index, found_short, seg->result, - urb->actual_length); + dev_dbg(dev, "xfer %p#%u: DONE short %d " + "result %zu urb->actual_length %d\n", + xfer, seg->index, found_short, seg->result, + urb->actual_length); break; case WA_SEG_ERROR: xfer->result = seg->result; - d_printf(2, NULL, "xfer %p#%u: ERROR result %zu\n", - xfer, seg->index, seg->result); + dev_dbg(dev, "xfer %p#%u: ERROR result %zu\n", + xfer, seg->index, seg->result); goto out; case WA_SEG_ABORTED: - WARN_ON(urb->status != -ECONNRESET - && urb->status != -ENOENT); - d_printf(2, NULL, "xfer %p#%u ABORTED: result %d\n", - xfer, seg->index, urb->status); + dev_dbg(dev, "xfer %p#%u ABORTED: result %d\n", + xfer, seg->index, urb->status); xfer->result = urb->status; goto out; default: - /* if (printk_ratelimit()) */ - printk(KERN_ERR "xfer %p#%u: " - "is_done bad state %d\n", - xfer, cnt, seg->status); + dev_warn(dev, "xfer %p#%u: is_done bad state %d\n", + xfer, cnt, seg->status); xfer->result = -EINVAL; - WARN_ON(1); goto out; } } xfer->result = 0; out: - d_fnend(3, NULL, "(xfer %p) = void\n", xfer); return result; } @@ -424,8 +405,6 @@ static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer, struct urb *urb = xfer->urb; struct wa_rpipe *rpipe = xfer->ep->hcpriv; - d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n", - xfer, rpipe, urb); switch (rpipe->descr.bmAttribute & 0x3) { case USB_ENDPOINT_XFER_CONTROL: *pxfer_type = WA_XFER_TYPE_CTL; @@ -472,12 +451,10 @@ static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer, if (xfer->segs == 0 && *pxfer_type == WA_XFER_TYPE_CTL) xfer->segs = 1; error: - d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n", - xfer, rpipe, urb, (int)result); return result; } -/** Fill in the common request header and xfer-type specific data. */ +/* Fill in the common request header and xfer-type specific data. */ static void __wa_xfer_setup_hdr0(struct wa_xfer *xfer, struct wa_xfer_hdr *xfer_hdr0, enum wa_xfer_type xfer_type, @@ -534,14 +511,13 @@ static void wa_seg_dto_cb(struct urb *urb) unsigned rpipe_ready = 0; u8 done = 0; - d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status); switch (urb->status) { case 0: spin_lock_irqsave(&xfer->lock, flags); wa = xfer->wa; dev = &wa->usb_iface->dev; - d_printf(2, dev, "xfer %p#%u: data out done (%d bytes)\n", - xfer, seg->index, urb->actual_length); + dev_dbg(dev, "xfer %p#%u: data out done (%d bytes)\n", + xfer, seg->index, urb->actual_length); if (seg->status < WA_SEG_PENDING) seg->status = WA_SEG_PENDING; seg->result = urb->actual_length; @@ -555,9 +531,8 @@ static void wa_seg_dto_cb(struct urb *urb) wa = xfer->wa; dev = &wa->usb_iface->dev; rpipe = xfer->ep->hcpriv; - if (printk_ratelimit()) - dev_err(dev, "xfer %p#%u: data out error %d\n", - xfer, seg->index, urb->status); + dev_dbg(dev, "xfer %p#%u: data out error %d\n", + xfer, seg->index, urb->status); if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)){ dev_err(dev, "DTO: URB max acceptable errors " @@ -578,7 +553,6 @@ static void wa_seg_dto_cb(struct urb *urb) if (rpipe_ready) wa_xfer_delayed_run(rpipe); } - d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status); } /* @@ -610,14 +584,12 @@ static void wa_seg_cb(struct urb *urb) unsigned rpipe_ready; u8 done = 0; - d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status); switch (urb->status) { case 0: spin_lock_irqsave(&xfer->lock, flags); wa = xfer->wa; dev = &wa->usb_iface->dev; - d_printf(2, dev, "xfer %p#%u: request done\n", - xfer, seg->index); + dev_dbg(dev, "xfer %p#%u: request done\n", xfer, seg->index); if (xfer->is_inbound && seg->status < WA_SEG_PENDING) seg->status = WA_SEG_PENDING; spin_unlock_irqrestore(&xfer->lock, flags); @@ -652,7 +624,6 @@ static void wa_seg_cb(struct urb *urb) if (rpipe_ready) wa_xfer_delayed_run(rpipe); } - d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status); } /* @@ -750,9 +721,6 @@ static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb) size_t xfer_hdr_size, cnt, transfer_size; struct wa_xfer_hdr *xfer_hdr0, *xfer_hdr; - d_fnstart(3, dev, "(xfer %p [rpipe %p] urb %p)\n", - xfer, xfer->ep->hcpriv, urb); - result = __wa_xfer_setup_sizes(xfer, &xfer_type); if (result < 0) goto error_setup_sizes; @@ -788,8 +756,6 @@ static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb) result = 0; error_setup_segs: error_setup_sizes: - d_fnend(3, dev, "(xfer %p [rpipe %p] urb %p) = %d\n", - xfer, xfer->ep->hcpriv, urb, result); return result; } @@ -843,9 +809,6 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe) struct wa_xfer *xfer; unsigned long flags; - d_fnstart(1, dev, "(rpipe #%d) %d segments available\n", - le16_to_cpu(rpipe->descr.wRPipeIndex), - atomic_read(&rpipe->segs_available)); spin_lock_irqsave(&rpipe->seg_lock, flags); while (atomic_read(&rpipe->segs_available) > 0 && !list_empty(&rpipe->seg_list)) { @@ -854,10 +817,8 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe) list_del(&seg->list_node); xfer = seg->xfer; result = __wa_seg_submit(rpipe, xfer, seg); - d_printf(1, dev, "xfer %p#%u submitted from delayed " - "[%d segments available] %d\n", - xfer, seg->index, - atomic_read(&rpipe->segs_available), result); + dev_dbg(dev, "xfer %p#%u submitted from delayed [%d segments available] %d\n", + xfer, seg->index, atomic_read(&rpipe->segs_available), result); if (unlikely(result < 0)) { spin_unlock_irqrestore(&rpipe->seg_lock, flags); spin_lock_irqsave(&xfer->lock, flags); @@ -868,10 +829,6 @@ static void wa_xfer_delayed_run(struct wa_rpipe *rpipe) } } spin_unlock_irqrestore(&rpipe->seg_lock, flags); - d_fnend(1, dev, "(rpipe #%d) = void, %d segments available\n", - le16_to_cpu(rpipe->descr.wRPipeIndex), - atomic_read(&rpipe->segs_available)); - } /* @@ -894,9 +851,6 @@ static int __wa_xfer_submit(struct wa_xfer *xfer) u8 available; u8 empty; - d_fnstart(3, dev, "(xfer %p [rpipe %p])\n", - xfer, xfer->ep->hcpriv); - spin_lock_irqsave(&wa->xfer_list_lock, flags); list_add_tail(&xfer->list_node, &wa->xfer_list); spin_unlock_irqrestore(&wa->xfer_list_lock, flags); @@ -908,30 +862,24 @@ static int __wa_xfer_submit(struct wa_xfer *xfer) available = atomic_read(&rpipe->segs_available); empty = list_empty(&rpipe->seg_list); seg = xfer->seg[cnt]; - d_printf(2, dev, "xfer %p#%u: available %u empty %u (%s)\n", - xfer, cnt, available, empty, - available == 0 || !empty ? "delayed" : "submitted"); + dev_dbg(dev, "xfer %p#%u: available %u empty %u (%s)\n", + xfer, cnt, available, empty, + available == 0 || !empty ? "delayed" : "submitted"); if (available == 0 || !empty) { - d_printf(1, dev, "xfer %p#%u: delayed\n", xfer, cnt); + dev_dbg(dev, "xfer %p#%u: delayed\n", xfer, cnt); seg->status = WA_SEG_DELAYED; list_add_tail(&seg->list_node, &rpipe->seg_list); } else { result = __wa_seg_submit(rpipe, xfer, seg); - if (result < 0) + if (result < 0) { + __wa_xfer_abort(xfer); goto error_seg_submit; + } } xfer->segs_submitted++; } - spin_unlock_irqrestore(&rpipe->seg_lock, flags); - d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer, - xfer->ep->hcpriv); - return result; - error_seg_submit: - __wa_xfer_abort(xfer); spin_unlock_irqrestore(&rpipe->seg_lock, flags); - d_fnend(3, dev, "(xfer %p [rpipe %p]) = void\n", xfer, - xfer->ep->hcpriv); return result; } @@ -964,11 +912,9 @@ static void wa_urb_enqueue_b(struct wa_xfer *xfer) struct urb *urb = xfer->urb; struct wahc *wa = xfer->wa; struct wusbhc *wusbhc = wa->wusb; - struct device *dev = &wa->usb_iface->dev; struct wusb_dev *wusb_dev; unsigned done; - d_fnstart(3, dev, "(wa %p urb %p)\n", wa, urb); result = rpipe_get_by_ep(wa, xfer->ep, urb, xfer->gfp); if (result < 0) goto error_rpipe_get; @@ -997,7 +943,6 @@ static void wa_urb_enqueue_b(struct wa_xfer *xfer) if (result < 0) goto error_xfer_submit; spin_unlock_irqrestore(&xfer->lock, flags); - d_fnend(3, dev, "(wa %p urb %p) = void\n", wa, urb); return; /* this is basically wa_xfer_completion() broken up wa_xfer_giveback() @@ -1015,7 +960,6 @@ error_dev_gone: error_rpipe_get: xfer->result = result; wa_xfer_giveback(xfer); - d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result); return; error_xfer_submit: @@ -1024,8 +968,6 @@ error_xfer_submit: spin_unlock_irqrestore(&xfer->lock, flags); if (done) wa_xfer_completion(xfer); - d_fnend(3, dev, "(wa %p urb %p) = (void) %d\n", wa, urb, result); - return; } /* @@ -1041,11 +983,9 @@ error_xfer_submit: void wa_urb_enqueue_run(struct work_struct *ws) { struct wahc *wa = container_of(ws, struct wahc, xfer_work); - struct device *dev = &wa->usb_iface->dev; struct wa_xfer *xfer, *next; struct urb *urb; - d_fnstart(3, dev, "(wa %p)\n", wa); spin_lock_irq(&wa->xfer_list_lock); list_for_each_entry_safe(xfer, next, &wa->xfer_delayed_list, list_node) { @@ -1059,7 +999,6 @@ void wa_urb_enqueue_run(struct work_struct *ws) spin_lock_irq(&wa->xfer_list_lock); } spin_unlock_irq(&wa->xfer_list_lock); - d_fnend(3, dev, "(wa %p) = void\n", wa); } EXPORT_SYMBOL_GPL(wa_urb_enqueue_run); @@ -1084,9 +1023,6 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep, unsigned long my_flags; unsigned cant_sleep = irqs_disabled() | in_atomic(); - d_fnstart(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x)\n", - wa, ep, urb, urb->transfer_buffer_length, gfp); - if (urb->transfer_buffer == NULL && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) && urb->transfer_buffer_length != 0) { @@ -1108,11 +1044,13 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep, xfer->gfp = gfp; xfer->ep = ep; urb->hcpriv = xfer; - d_printf(2, dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n", - xfer, urb, urb->pipe, urb->transfer_buffer_length, - urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma", - urb->pipe & USB_DIR_IN ? "inbound" : "outbound", - cant_sleep ? "deferred" : "inline"); + + dev_dbg(dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n", + xfer, urb, urb->pipe, urb->transfer_buffer_length, + urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma", + urb->pipe & USB_DIR_IN ? "inbound" : "outbound", + cant_sleep ? "deferred" : "inline"); + if (cant_sleep) { usb_get_urb(urb); spin_lock_irqsave(&wa->xfer_list_lock, my_flags); @@ -1122,15 +1060,11 @@ int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep, } else { wa_urb_enqueue_b(xfer); } - d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = 0\n", - wa, ep, urb, urb->transfer_buffer_length, gfp); return 0; error_dequeued: kfree(xfer); error_kmalloc: - d_fnend(3, dev, "(wa %p ep %p urb %p [%d] gfp 0x%x) = %d\n", - wa, ep, urb, urb->transfer_buffer_length, gfp, result); return result; } EXPORT_SYMBOL_GPL(wa_urb_enqueue); @@ -1155,7 +1089,6 @@ EXPORT_SYMBOL_GPL(wa_urb_enqueue); */ int wa_urb_dequeue(struct wahc *wa, struct urb *urb) { - struct device *dev = &wa->usb_iface->dev; unsigned long flags, flags2; struct wa_xfer *xfer; struct wa_seg *seg; @@ -1163,9 +1096,6 @@ int wa_urb_dequeue(struct wahc *wa, struct urb *urb) unsigned cnt; unsigned rpipe_ready = 0; - d_fnstart(3, dev, "(wa %p, urb %p)\n", wa, urb); - - d_printf(1, dev, "xfer %p urb %p: aborting\n", urb->hcpriv, urb); xfer = urb->hcpriv; if (xfer == NULL) { /* NOthing setup yet enqueue will see urb->status != @@ -1234,13 +1164,11 @@ int wa_urb_dequeue(struct wahc *wa, struct urb *urb) wa_xfer_completion(xfer); if (rpipe_ready) wa_xfer_delayed_run(rpipe); - d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb); return 0; out_unlock: spin_unlock_irqrestore(&xfer->lock, flags); out: - d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb); return 0; dequeue_delayed: @@ -1250,7 +1178,6 @@ dequeue_delayed: spin_unlock_irqrestore(&xfer->lock, flags); wa_xfer_giveback(xfer); usb_put_urb(urb); /* we got a ref in enqueue() */ - d_fnend(3, dev, "(wa %p, urb %p) = 0\n", wa, urb); return 0; } EXPORT_SYMBOL_GPL(wa_urb_dequeue); @@ -1326,7 +1253,6 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer) u8 usb_status; unsigned rpipe_ready = 0; - d_fnstart(3, dev, "(wa %p xfer %p)\n", wa, xfer); spin_lock_irqsave(&xfer->lock, flags); seg_idx = xfer_result->bTransferSegment & 0x7f; if (unlikely(seg_idx >= xfer->segs)) @@ -1334,8 +1260,8 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer) seg = xfer->seg[seg_idx]; rpipe = xfer->ep->hcpriv; usb_status = xfer_result->bTransferStatus; - d_printf(2, dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n", - xfer, seg_idx, usb_status, seg->status); + dev_dbg(dev, "xfer %p#%u: bTransferStatus 0x%02x (seg %u)\n", + xfer, seg_idx, usb_status, seg->status); if (seg->status == WA_SEG_ABORTED || seg->status == WA_SEG_ERROR) /* already handled */ goto segment_aborted; @@ -1391,10 +1317,8 @@ static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer) wa_xfer_completion(xfer); if (rpipe_ready) wa_xfer_delayed_run(rpipe); - d_fnend(3, dev, "(wa %p xfer %p) = void\n", wa, xfer); return; - error_submit_buf_in: if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) { dev_err(dev, "DTI: URB max acceptable errors " @@ -1416,11 +1340,8 @@ error_complete: wa_xfer_completion(xfer); if (rpipe_ready) wa_xfer_delayed_run(rpipe); - d_fnend(3, dev, "(wa %p xfer %p) = void [segment/DTI-submit error]\n", - wa, xfer); return; - error_bad_seg: spin_unlock_irqrestore(&xfer->lock, flags); wa_urb_dequeue(wa, xfer->urb); @@ -1431,17 +1352,11 @@ error_bad_seg: "exceeded, resetting device\n"); wa_reset_all(wa); } - d_fnend(3, dev, "(wa %p xfer %p) = void [bad seg]\n", wa, xfer); return; - segment_aborted: /* nothing to do, as the aborter did the completion */ spin_unlock_irqrestore(&xfer->lock, flags); - d_fnend(3, dev, "(wa %p xfer %p) = void [segment aborted]\n", - wa, xfer); - return; - } /* @@ -1465,15 +1380,14 @@ static void wa_buf_in_cb(struct urb *urb) unsigned long flags; u8 done = 0; - d_fnstart(3, NULL, "(urb %p [%d])\n", urb, urb->status); switch (urb->status) { case 0: spin_lock_irqsave(&xfer->lock, flags); wa = xfer->wa; dev = &wa->usb_iface->dev; rpipe = xfer->ep->hcpriv; - d_printf(2, dev, "xfer %p#%u: data in done (%zu bytes)\n", - xfer, seg->index, (size_t)urb->actual_length); + dev_dbg(dev, "xfer %p#%u: data in done (%zu bytes)\n", + xfer, seg->index, (size_t)urb->actual_length); seg->status = WA_SEG_DONE; seg->result = urb->actual_length; xfer->segs_done++; @@ -1514,7 +1428,6 @@ static void wa_buf_in_cb(struct urb *urb) if (rpipe_ready) wa_xfer_delayed_run(rpipe); } - d_fnend(3, NULL, "(urb %p [%d]) = void\n", urb, urb->status); } /* @@ -1553,14 +1466,12 @@ static void wa_xfer_result_cb(struct urb *urb) struct wa_xfer *xfer; u8 usb_status; - d_fnstart(3, dev, "(%p)\n", wa); BUG_ON(wa->dti_urb != urb); switch (wa->dti_urb->status) { case 0: /* We have a xfer result buffer; check it */ - d_printf(2, dev, "DTI: xfer result %d bytes at %p\n", - urb->actual_length, urb->transfer_buffer); - d_dump(3, dev, urb->transfer_buffer, urb->actual_length); + dev_dbg(dev, "DTI: xfer result %d bytes at %p\n", + urb->actual_length, urb->transfer_buffer); if (wa->dti_urb->actual_length != sizeof(*xfer_result)) { dev_err(dev, "DTI Error: xfer result--bad size " "xfer result (%d bytes vs %zu needed)\n", @@ -1622,7 +1533,6 @@ static void wa_xfer_result_cb(struct urb *urb) wa_reset_all(wa); } out: - d_fnend(3, dev, "(%p) = void\n", wa); return; } @@ -1653,7 +1563,6 @@ void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr) struct wa_notif_xfer *notif_xfer; const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd; - d_fnstart(4, dev, "(%p, %p)\n", wa, notif_hdr); notif_xfer = container_of(notif_hdr, struct wa_notif_xfer, hdr); BUG_ON(notif_hdr->bNotifyType != WA_NOTIF_TRANSFER); @@ -1693,7 +1602,6 @@ void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr) goto error_dti_urb_submit; } out: - d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr); return; error_dti_urb_submit: @@ -1704,6 +1612,4 @@ error_buf_in_urb_alloc: error_dti_urb_alloc: error: wa_reset_all(wa); - d_fnend(4, dev, "(%p, %p) = void\n", wa, notif_hdr); - return; } diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h index d0c132434f1b..797c2453a35b 100644 --- a/drivers/usb/wusbcore/wusbhc.h +++ b/drivers/usb/wusbcore/wusbhc.h @@ -64,6 +64,13 @@ #include <linux/uwb.h> #include <linux/usb/wusb.h> +/* + * Time from a WUSB channel stop request to the last transmitted MMC. + * + * This needs to be > 4.096 ms in case no MMCs can be transmitted in + * zone 0. + */ +#define WUSB_CHANNEL_STOP_DELAY_MS 8 /** * Wireless USB device @@ -147,7 +154,6 @@ struct wusb_port { u16 status; u16 change; struct wusb_dev *wusb_dev; /* connected device's info */ - unsigned reset_count; u32 ptk_tkid; }; @@ -198,21 +204,18 @@ struct wusb_port { * @mmcies_max Max number of Information Elements this HC can send * in its MMC. Read-only. * + * @start Start the WUSB channel. + * + * @stop Stop the WUSB channel after the specified number of + * milliseconds. Channel Stop IEs should be transmitted + * as required by [WUSB] 4.16.2.1. + * * @mmcie_add HC specific operation (WHCI or HWA) for adding an * MMCIE. * * @mmcie_rm HC specific operation (WHCI or HWA) for removing an * MMCIE. * - * @enc_types Array which describes the encryptions methods - * supported by the host as described in WUSB1.0 -- - * one entry per supported method. As of WUSB1.0 there - * is only four methods, we make space for eight just in - * case they decide to add some more (and pray they do - * it in sequential order). if 'enc_types[enc_method] - * != 0', then it is supported by the host. enc_method - * is USB_ENC_TYPE*. - * * @set_ptk: Set the PTK and enable encryption for a device. Or, if * the supplied key is NULL, disable encryption for that * device. @@ -249,7 +252,8 @@ struct wusbhc { struct uwb_pal pal; unsigned trust_timeout; /* in jiffies */ - struct wuie_host_info *wuie_host_info; /* Includes CHID */ + struct wusb_ckhdid chid; + struct wuie_host_info *wuie_host_info; struct mutex mutex; /* locks everything else */ u16 cluster_id; /* Wireless USB Cluster ID */ @@ -269,7 +273,7 @@ struct wusbhc { u8 mmcies_max; /* FIXME: make wusbhc_ops? */ int (*start)(struct wusbhc *wusbhc); - void (*stop)(struct wusbhc *wusbhc); + void (*stop)(struct wusbhc *wusbhc, int delay); int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt, u8 handle, struct wuie_hdr *wuie); int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle); @@ -373,20 +377,17 @@ static inline void wusbhc_put(struct wusbhc *wusbhc) usb_put_hcd(&wusbhc->usb_hcd); } -int wusbhc_start(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid); +int wusbhc_start(struct wusbhc *wusbhc); void wusbhc_stop(struct wusbhc *wusbhc); extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *); /* Device connect handling */ extern int wusbhc_devconnect_create(struct wusbhc *); extern void wusbhc_devconnect_destroy(struct wusbhc *); -extern int wusbhc_devconnect_start(struct wusbhc *wusbhc, - const struct wusb_ckhdid *chid); +extern int wusbhc_devconnect_start(struct wusbhc *wusbhc); extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc); -extern int wusbhc_devconnect_auth(struct wusbhc *, u8); extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr, struct wusb_dn_hdr *dn_hdr, size_t size); -extern int wusbhc_dev_reset(struct wusbhc *wusbhc, u8 port); extern void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port); extern int wusb_usb_ncb(struct notifier_block *nb, unsigned long val, void *priv); @@ -432,6 +433,7 @@ extern void wusb_dev_sec_rm(struct wusb_dev *) ; extern int wusb_dev_4way_handshake(struct wusbhc *, struct wusb_dev *, struct wusb_ckhdid *ck); void wusbhc_gtk_rekey(struct wusbhc *wusbhc); +int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev); /* WUSB Cluster ID handling */ diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 257e6908304c..2f98d080fe78 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_UWB_I1480U) += i1480/ uwb-objs := \ address.o \ + allocator.o \ beacon.o \ driver.o \ drp.o \ @@ -13,10 +14,12 @@ uwb-objs := \ drp-ie.o \ est.o \ ie.o \ + ie-rcv.o \ lc-dev.o \ lc-rc.o \ neh.o \ pal.o \ + radio.o \ reset.o \ rsv.o \ scan.o \ diff --git a/drivers/uwb/address.c b/drivers/uwb/address.c index 1664ae5f1706..ad21b1d7218c 100644 --- a/drivers/uwb/address.c +++ b/drivers/uwb/address.c @@ -28,7 +28,7 @@ #include <linux/device.h> #include <linux/random.h> #include <linux/etherdevice.h> -#include <linux/uwb/debug.h> + #include "uwb-internal.h" diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c new file mode 100644 index 000000000000..c8185e6b0cd5 --- /dev/null +++ b/drivers/uwb/allocator.c @@ -0,0 +1,386 @@ +/* + * UWB reservation management. + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/version.h> +#include <linux/kernel.h> +#include <linux/uwb.h> + +#include "uwb-internal.h" + +static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai) +{ + int col, mas, safe_mas, unsafe_mas; + unsigned char *bm = ai->bm; + struct uwb_rsv_col_info *ci = ai->ci; + unsigned char c; + + for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) { + + safe_mas = ci->csi.safe_mas_per_col; + unsafe_mas = ci->csi.unsafe_mas_per_col; + + for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) { + if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) { + + if (safe_mas > 0) { + safe_mas--; + c = UWB_RSV_MAS_SAFE; + } else if (unsafe_mas > 0) { + unsafe_mas--; + c = UWB_RSV_MAS_UNSAFE; + } else { + break; + } + bm[col * UWB_MAS_PER_ZONE + mas] = c; + } + } + } +} + +static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai) +{ + int mas, col, rows; + unsigned char *bm = ai->bm; + struct uwb_rsv_row_info *ri = &ai->ri; + unsigned char c; + + rows = 1; + c = UWB_RSV_MAS_SAFE; + for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) { + if (ri->avail[mas] == 1) { + + if (rows > ri->used_rows) { + break; + } else if (rows > 7) { + c = UWB_RSV_MAS_UNSAFE; + } + + for (col = 0; col < UWB_NUM_ZONES; col++) { + if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) { + bm[col * UWB_NUM_ZONES + mas] = c; + if(c == UWB_RSV_MAS_SAFE) + ai->safe_allocated_mases++; + else + ai->unsafe_allocated_mases++; + } + } + rows++; + } + } + ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases; +} + +/* + * Find the best column set for a given availability, interval, num safe mas and + * num unsafe mas. + * + * The different sets are tried in order as shown below, depending on the interval. + * + * interval = 16 + * deep = 0 + * set 1 -> { 8 } + * deep = 1 + * set 1 -> { 4 } + * set 2 -> { 12 } + * deep = 2 + * set 1 -> { 2 } + * set 2 -> { 6 } + * set 3 -> { 10 } + * set 4 -> { 14 } + * deep = 3 + * set 1 -> { 1 } + * set 2 -> { 3 } + * set 3 -> { 5 } + * set 4 -> { 7 } + * set 5 -> { 9 } + * set 6 -> { 11 } + * set 7 -> { 13 } + * set 8 -> { 15 } + * + * interval = 8 + * deep = 0 + * set 1 -> { 4 12 } + * deep = 1 + * set 1 -> { 2 10 } + * set 2 -> { 6 14 } + * deep = 2 + * set 1 -> { 1 9 } + * set 2 -> { 3 11 } + * set 3 -> { 5 13 } + * set 4 -> { 7 15 } + * + * interval = 4 + * deep = 0 + * set 1 -> { 2 6 10 14 } + * deep = 1 + * set 1 -> { 1 5 9 13 } + * set 2 -> { 3 7 11 15 } + * + * interval = 2 + * deep = 0 + * set 1 -> { 1 3 5 7 9 11 13 15 } + */ +static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, + int num_safe_mas, int num_unsafe_mas) +{ + struct uwb_rsv_col_info *ci = ai->ci; + struct uwb_rsv_col_set_info *csi = &ci->csi; + struct uwb_rsv_col_set_info tmp_csi; + int deep, set, col, start_col_deep, col_start_set; + int start_col, max_mas_in_set, lowest_max_mas_in_deep; + int n_mas; + int found = UWB_RSV_ALLOC_NOT_FOUND; + + tmp_csi.start_col = 0; + start_col_deep = interval; + n_mas = num_unsafe_mas + num_safe_mas; + + for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) { + start_col_deep /= 2; + col_start_set = 0; + lowest_max_mas_in_deep = UWB_MAS_PER_ZONE; + + for (set = 1; set <= (1 << deep); set++) { + max_mas_in_set = 0; + start_col = start_col_deep + col_start_set; + for (col = start_col; col < UWB_NUM_ZONES; col += interval) { + + if (ci[col].max_avail_safe >= num_safe_mas && + ci[col].max_avail_unsafe >= n_mas) { + if (ci[col].highest_mas[n_mas] > max_mas_in_set) + max_mas_in_set = ci[col].highest_mas[n_mas]; + } else { + max_mas_in_set = 0; + break; + } + } + if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) { + lowest_max_mas_in_deep = max_mas_in_set; + + tmp_csi.start_col = start_col; + } + col_start_set += (interval >> deep); + } + + if (lowest_max_mas_in_deep < 8) { + csi->start_col = tmp_csi.start_col; + found = UWB_RSV_ALLOC_FOUND; + break; + } else if ((lowest_max_mas_in_deep > 8) && + (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) && + (found == UWB_RSV_ALLOC_NOT_FOUND)) { + csi->start_col = tmp_csi.start_col; + found = UWB_RSV_ALLOC_FOUND; + } + } + + if (found == UWB_RSV_ALLOC_FOUND) { + csi->interval = interval; + csi->safe_mas_per_col = num_safe_mas; + csi->unsafe_mas_per_col = num_unsafe_mas; + + ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas; + ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas; + ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases; + ai->interval = interval; + } + return found; +} + +static void get_row_descriptors(struct uwb_rsv_alloc_info *ai) +{ + unsigned char *bm = ai->bm; + struct uwb_rsv_row_info *ri = &ai->ri; + int col, mas; + + ri->free_rows = 16; + for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) { + ri->avail[mas] = 1; + for (col = 1; col < UWB_NUM_ZONES; col++) { + if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) { + ri->free_rows--; + ri->avail[mas]=0; + break; + } + } + } +} + +static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci) +{ + int mas; + int block_count = 0, start_block = 0; + int previous_avail = 0; + int available = 0; + int safe_mas_in_row[UWB_MAS_PER_ZONE] = { + 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1, + }; + + rci->max_avail_safe = 0; + + for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) { + if (!bm[column * UWB_NUM_ZONES + mas]) { + available++; + rci->max_avail_unsafe = available; + + rci->highest_mas[available] = mas; + + if (previous_avail) { + block_count++; + if ((block_count > safe_mas_in_row[start_block]) && + (!rci->max_avail_safe)) + rci->max_avail_safe = available - 1; + } else { + previous_avail = 1; + start_block = mas; + block_count = 1; + } + } else { + previous_avail = 0; + } + } + if (!rci->max_avail_safe) + rci->max_avail_safe = rci->max_avail_unsafe; +} + +static void get_column_descriptors(struct uwb_rsv_alloc_info *ai) +{ + unsigned char *bm = ai->bm; + struct uwb_rsv_col_info *ci = ai->ci; + int col; + + for (col = 1; col < UWB_NUM_ZONES; col++) { + uwb_rsv_fill_column_info(bm, col, &ci[col]); + } +} + +static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai) +{ + int n_rows; + int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW; + int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW; + if (ai->min_mas % UWB_USABLE_MAS_PER_ROW) + min_rows++; + for (n_rows = max_rows; n_rows >= min_rows; n_rows--) { + if (n_rows <= ai->ri.free_rows) { + ai->ri.used_rows = n_rows; + ai->interval = 1; /* row reservation */ + uwb_rsv_fill_row_alloc(ai); + return UWB_RSV_ALLOC_FOUND; + } + } + return UWB_RSV_ALLOC_NOT_FOUND; +} + +static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval) +{ + int n_safe, n_unsafe, n_mas; + int n_column = UWB_NUM_ZONES / interval; + int max_per_zone = ai->max_mas / n_column; + int min_per_zone = ai->min_mas / n_column; + + if (ai->min_mas % n_column) + min_per_zone++; + + if (min_per_zone > UWB_MAS_PER_ZONE) { + return UWB_RSV_ALLOC_NOT_FOUND; + } + + if (max_per_zone > UWB_MAS_PER_ZONE) { + max_per_zone = UWB_MAS_PER_ZONE; + } + + for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) { + if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND) + continue; + for (n_safe = n_mas; n_safe >= 0; n_safe--) { + n_unsafe = n_mas - n_safe; + if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) { + uwb_rsv_fill_column_alloc(ai); + return UWB_RSV_ALLOC_FOUND; + } + } + } + return UWB_RSV_ALLOC_NOT_FOUND; +} + +int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, + struct uwb_mas_bm *result) +{ + struct uwb_rsv_alloc_info *ai; + int interval; + int bit_index; + + ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL); + + ai->min_mas = rsv->min_mas; + ai->max_mas = rsv->max_mas; + ai->max_interval = rsv->max_interval; + + + /* fill the not available vector from the available bm */ + for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) { + if (!test_bit(bit_index, available->bm)) + ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL; + } + + if (ai->max_interval == 1) { + get_row_descriptors(ai); + if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND) + goto alloc_found; + else + goto alloc_not_found; + } + + get_column_descriptors(ai); + + for (interval = 16; interval >= 2; interval>>=1) { + if (interval > ai->max_interval) + continue; + if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND) + goto alloc_found; + } + + /* try row reservation if no column is found */ + get_row_descriptors(ai); + if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND) + goto alloc_found; + else + goto alloc_not_found; + + alloc_found: + bitmap_zero(result->bm, UWB_NUM_MAS); + bitmap_zero(result->unsafe_bm, UWB_NUM_MAS); + /* fill the safe and unsafe bitmaps */ + for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) { + if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE) + set_bit(bit_index, result->bm); + else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE) + set_bit(bit_index, result->unsafe_bm); + } + bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS); + + result->safe = ai->safe_allocated_mases; + result->unsafe = ai->unsafe_allocated_mases; + + kfree(ai); + return UWB_RSV_ALLOC_FOUND; + + alloc_not_found: + kfree(ai); + return UWB_RSV_ALLOC_NOT_FOUND; +} diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c index 46b18eec5026..36bc3158006f 100644 --- a/drivers/uwb/beacon.c +++ b/drivers/uwb/beacon.c @@ -22,19 +22,16 @@ * * FIXME: docs */ - #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/device.h> #include <linux/err.h> #include <linux/kdev_t.h> -#include "uwb-internal.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> +#include "uwb-internal.h" -/** Start Beaconing command structure */ +/* Start Beaconing command structure */ struct uwb_rc_cmd_start_beacon { struct uwb_rccb rccb; __le16 wBPSTOffset; @@ -119,7 +116,6 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) int result; struct device *dev = &rc->uwb_dev.dev; - mutex_lock(&rc->uwb_dev.mutex); if (channel < 0) channel = -1; if (channel == -1) @@ -128,7 +124,7 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) /* channel >= 0...dah */ result = uwb_rc_start_beacon(rc, bpst_offset, channel); if (result < 0) - goto out_up; + return result; if (le16_to_cpu(rc->ies->wIELength) > 0) { result = uwb_rc_set_ie(rc, rc->ies); if (result < 0) { @@ -137,19 +133,12 @@ int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset) result = uwb_rc_stop_beacon(rc); channel = -1; bpst_offset = 0; - } else - result = 0; + } } } - if (result < 0) - goto out_up; - rc->beaconing = channel; - - uwb_notify(rc, NULL, uwb_bg_joined(rc) ? UWB_NOTIF_BG_JOIN : UWB_NOTIF_BG_LEAVE); - -out_up: - mutex_unlock(&rc->uwb_dev.mutex); + if (result >= 0) + rc->beaconing = channel; return result; } @@ -168,12 +157,6 @@ out_up: * FIXME: use something faster for search than a list */ -struct uwb_beca uwb_beca = { - .list = LIST_HEAD_INIT(uwb_beca.list), - .mutex = __MUTEX_INITIALIZER(uwb_beca.mutex) -}; - - void uwb_bce_kfree(struct kref *_bce) { struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt); @@ -185,13 +168,11 @@ void uwb_bce_kfree(struct kref *_bce) /* Find a beacon by dev addr in the cache */ static -struct uwb_beca_e *__uwb_beca_find_bydev(const struct uwb_dev_addr *dev_addr) +struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc, + const struct uwb_dev_addr *dev_addr) { struct uwb_beca_e *bce, *next; - list_for_each_entry_safe(bce, next, &uwb_beca.list, node) { - d_printf(6, NULL, "looking for addr %02x:%02x in %02x:%02x\n", - dev_addr->data[0], dev_addr->data[1], - bce->dev_addr.data[0], bce->dev_addr.data[1]); + list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) { if (!memcmp(&bce->dev_addr, dev_addr, sizeof(bce->dev_addr))) goto out; } @@ -202,10 +183,11 @@ out: /* Find a beacon by dev addr in the cache */ static -struct uwb_beca_e *__uwb_beca_find_bymac(const struct uwb_mac_addr *mac_addr) +struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc, + const struct uwb_mac_addr *mac_addr) { struct uwb_beca_e *bce, *next; - list_for_each_entry_safe(bce, next, &uwb_beca.list, node) { + list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) { if (!memcmp(bce->mac_addr, mac_addr->data, sizeof(struct uwb_mac_addr))) goto out; @@ -229,11 +211,11 @@ struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc, struct uwb_dev *found = NULL; struct uwb_beca_e *bce; - mutex_lock(&uwb_beca.mutex); - bce = __uwb_beca_find_bydev(devaddr); + mutex_lock(&rc->uwb_beca.mutex); + bce = __uwb_beca_find_bydev(rc, devaddr); if (bce) found = uwb_dev_try_get(rc, bce->uwb_dev); - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); return found; } @@ -249,11 +231,11 @@ struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc, struct uwb_dev *found = NULL; struct uwb_beca_e *bce; - mutex_lock(&uwb_beca.mutex); - bce = __uwb_beca_find_bymac(macaddr); + mutex_lock(&rc->uwb_beca.mutex); + bce = __uwb_beca_find_bymac(rc, macaddr); if (bce) found = uwb_dev_try_get(rc, bce->uwb_dev); - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); return found; } @@ -274,7 +256,9 @@ static void uwb_beca_e_init(struct uwb_beca_e *bce) * @bf: Beacon frame (part of b, really) * @ts_jiffies: Timestamp (in jiffies) when the beacon was received */ -struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be, +static +struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc, + struct uwb_rc_evt_beacon *be, struct uwb_beacon_frame *bf, unsigned long ts_jiffies) { @@ -286,7 +270,7 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be, uwb_beca_e_init(bce); bce->ts_jiffies = ts_jiffies; bce->uwb_dev = NULL; - list_add(&bce->node, &uwb_beca.list); + list_add(&bce->node, &rc->uwb_beca.list); return bce; } @@ -295,33 +279,32 @@ struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *be, * * Remove associated devicest too. */ -void uwb_beca_purge(void) +void uwb_beca_purge(struct uwb_rc *rc) { struct uwb_beca_e *bce, *next; unsigned long expires; - mutex_lock(&uwb_beca.mutex); - list_for_each_entry_safe(bce, next, &uwb_beca.list, node) { + mutex_lock(&rc->uwb_beca.mutex); + list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) { expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms); if (time_after(jiffies, expires)) { uwbd_dev_offair(bce); - list_del(&bce->node); - uwb_bce_put(bce); } } - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); } /* Clean up the whole beacon cache. Called on shutdown */ -void uwb_beca_release(void) +void uwb_beca_release(struct uwb_rc *rc) { struct uwb_beca_e *bce, *next; - mutex_lock(&uwb_beca.mutex); - list_for_each_entry_safe(bce, next, &uwb_beca.list, node) { + + mutex_lock(&rc->uwb_beca.mutex); + list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) { list_del(&bce->node); uwb_bce_put(bce); } - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); } static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be, @@ -349,22 +332,22 @@ ssize_t uwb_bce_print_IEs(struct uwb_dev *uwb_dev, struct uwb_beca_e *bce, ssize_t result = 0; struct uwb_rc_evt_beacon *be; struct uwb_beacon_frame *bf; - struct uwb_buf_ctx ctx = { - .buf = buf, - .bytes = 0, - .size = size - }; + int ies_len; + struct uwb_ie_hdr *ies; mutex_lock(&bce->mutex); + be = bce->be; - if (be == NULL) - goto out; - bf = (void *) be->BeaconInfo; - uwb_ie_for_each(uwb_dev, uwb_ie_dump_hex, &ctx, - bf->IEData, be->wBeaconInfoLength - sizeof(*bf)); - result = ctx.bytes; -out: + if (be) { + bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo; + ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame); + ies = (struct uwb_ie_hdr *)bf->IEData; + + result = uwb_ie_dump_hex(ies, ies_len, buf, size); + } + mutex_unlock(&bce->mutex); + return result; } @@ -437,18 +420,18 @@ int uwbd_evt_handle_rc_beacon(struct uwb_event *evt) if (uwb_mac_addr_bcast(&bf->Device_Identifier)) return 0; - mutex_lock(&uwb_beca.mutex); - bce = __uwb_beca_find_bymac(&bf->Device_Identifier); + mutex_lock(&rc->uwb_beca.mutex); + bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier); if (bce == NULL) { /* Not in there, a new device is pinging */ uwb_beacon_print(evt->rc, be, bf); - bce = __uwb_beca_add(be, bf, evt->ts_jiffies); + bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies); if (bce == NULL) { - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); return -ENOMEM; } } - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); mutex_lock(&bce->mutex); /* purge old beacon data */ @@ -588,19 +571,6 @@ error: return result; } -/** - * uwb_bg_joined - is the RC in a beacon group? - * @rc: the radio controller - * - * Returns true if the radio controller is in a beacon group (even if - * it's the sole member). - */ -int uwb_bg_joined(struct uwb_rc *rc) -{ - return rc->beaconing != -1; -} -EXPORT_SYMBOL_GPL(uwb_bg_joined); - /* * Print beaconing state. */ @@ -619,9 +589,6 @@ static ssize_t uwb_rc_beacon_show(struct device *dev, /* * Start beaconing on the specified channel, or stop beaconing. - * - * The BPST offset of when to start searching for a beacon group to - * join may be specified. */ static ssize_t uwb_rc_beacon_store(struct device *dev, struct device_attribute *attr, @@ -630,12 +597,11 @@ static ssize_t uwb_rc_beacon_store(struct device *dev, struct uwb_dev *uwb_dev = to_uwb_dev(dev); struct uwb_rc *rc = uwb_dev->rc; int channel; - unsigned bpst_offset = 0; ssize_t result = -EINVAL; - result = sscanf(buf, "%d %u\n", &channel, &bpst_offset); + result = sscanf(buf, "%d", &channel); if (result >= 1) - result = uwb_rc_beacon(rc, channel, bpst_offset); + result = uwb_radio_force_channel(rc, channel); return result < 0 ? result : size; } diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c index 521cdeb84971..da77e41de990 100644 --- a/drivers/uwb/driver.c +++ b/drivers/uwb/driver.c @@ -53,7 +53,7 @@ #include <linux/err.h> #include <linux/kdev_t.h> #include <linux/random.h> -#include <linux/uwb/debug.h> + #include "uwb-internal.h" @@ -118,7 +118,6 @@ static int __init uwb_subsys_init(void) result = class_register(&uwb_rc_class); if (result < 0) goto error_uwb_rc_class_register; - uwbd_start(); uwb_dbg_init(); return 0; @@ -132,7 +131,6 @@ module_init(uwb_subsys_init); static void __exit uwb_subsys_exit(void) { uwb_dbg_exit(); - uwbd_stop(); class_unregister(&uwb_rc_class); uwb_est_destroy(); return; diff --git a/drivers/uwb/drp-avail.c b/drivers/uwb/drp-avail.c index 3febd8552808..40a540a5a72e 100644 --- a/drivers/uwb/drp-avail.c +++ b/drivers/uwb/drp-avail.c @@ -58,7 +58,7 @@ void uwb_drp_avail_init(struct uwb_rc *rc) * * avail = global & local & pending */ -static void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail) +void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail) { bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS); bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS); @@ -105,6 +105,7 @@ void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas) bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS); bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS); rc->drp_avail.ie_valid = false; + uwb_rsv_handle_drp_avail_change(rc); } /** @@ -280,6 +281,7 @@ int uwbd_evt_handle_rc_drp_avail(struct uwb_event *evt) mutex_lock(&rc->rsvs_mutex); bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS); rc->drp_avail.ie_valid = false; + uwb_rsv_handle_drp_avail_change(rc); mutex_unlock(&rc->rsvs_mutex); uwb_rsv_sched_update(rc); diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c index 882724c5f126..2840d7bf9e67 100644 --- a/drivers/uwb/drp-ie.c +++ b/drivers/uwb/drp-ie.c @@ -16,13 +16,102 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/random.h> #include <linux/uwb.h> #include "uwb-internal.h" + +/* + * Return the reason code for a reservations's DRP IE. + */ +int uwb_rsv_reason_code(struct uwb_rsv *rsv) +{ + static const int reason_codes[] = { + [UWB_RSV_STATE_O_INITIATED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_O_PENDING] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_O_MODIFIED] = UWB_DRP_REASON_MODIFIED, + [UWB_RSV_STATE_O_ESTABLISHED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_O_TO_BE_MOVED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_O_MOVE_COMBINING] = UWB_DRP_REASON_MODIFIED, + [UWB_RSV_STATE_O_MOVE_REDUCING] = UWB_DRP_REASON_MODIFIED, + [UWB_RSV_STATE_O_MOVE_EXPANDING] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_ACCEPTED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_CONFLICT] = UWB_DRP_REASON_CONFLICT, + [UWB_RSV_STATE_T_PENDING] = UWB_DRP_REASON_PENDING, + [UWB_RSV_STATE_T_DENIED] = UWB_DRP_REASON_DENIED, + [UWB_RSV_STATE_T_RESIZED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT, + [UWB_RSV_STATE_T_EXPANDING_PENDING] = UWB_DRP_REASON_PENDING, + [UWB_RSV_STATE_T_EXPANDING_DENIED] = UWB_DRP_REASON_DENIED, + }; + + return reason_codes[rsv->state]; +} + +/* + * Return the reason code for a reservations's companion DRP IE . + */ +int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv) +{ + static const int companion_reason_codes[] = { + [UWB_RSV_STATE_O_MOVE_EXPANDING] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED, + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT, + [UWB_RSV_STATE_T_EXPANDING_PENDING] = UWB_DRP_REASON_PENDING, + [UWB_RSV_STATE_T_EXPANDING_DENIED] = UWB_DRP_REASON_DENIED, + }; + + return companion_reason_codes[rsv->state]; +} + +/* + * Return the status bit for a reservations's DRP IE. + */ +int uwb_rsv_status(struct uwb_rsv *rsv) +{ + static const int statuses[] = { + [UWB_RSV_STATE_O_INITIATED] = 0, + [UWB_RSV_STATE_O_PENDING] = 0, + [UWB_RSV_STATE_O_MODIFIED] = 1, + [UWB_RSV_STATE_O_ESTABLISHED] = 1, + [UWB_RSV_STATE_O_TO_BE_MOVED] = 0, + [UWB_RSV_STATE_O_MOVE_COMBINING] = 1, + [UWB_RSV_STATE_O_MOVE_REDUCING] = 1, + [UWB_RSV_STATE_O_MOVE_EXPANDING] = 1, + [UWB_RSV_STATE_T_ACCEPTED] = 1, + [UWB_RSV_STATE_T_CONFLICT] = 0, + [UWB_RSV_STATE_T_PENDING] = 0, + [UWB_RSV_STATE_T_DENIED] = 0, + [UWB_RSV_STATE_T_RESIZED] = 1, + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1, + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1, + [UWB_RSV_STATE_T_EXPANDING_PENDING] = 1, + [UWB_RSV_STATE_T_EXPANDING_DENIED] = 1, + + }; + + return statuses[rsv->state]; +} + +/* + * Return the status bit for a reservations's companion DRP IE . + */ +int uwb_rsv_companion_status(struct uwb_rsv *rsv) +{ + static const int companion_statuses[] = { + [UWB_RSV_STATE_O_MOVE_EXPANDING] = 0, + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1, + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0, + [UWB_RSV_STATE_T_EXPANDING_PENDING] = 0, + [UWB_RSV_STATE_T_EXPANDING_DENIED] = 0, + }; + + return companion_statuses[rsv->state]; +} + /* * Allocate a DRP IE. * @@ -34,16 +123,12 @@ static struct uwb_ie_drp *uwb_drp_ie_alloc(void) { struct uwb_ie_drp *drp_ie; - unsigned tiebreaker; drp_ie = kzalloc(sizeof(struct uwb_ie_drp) + UWB_NUM_ZONES * sizeof(struct uwb_drp_alloc), GFP_KERNEL); if (drp_ie) { drp_ie->hdr.element_id = UWB_IE_DRP; - - get_random_bytes(&tiebreaker, sizeof(unsigned)); - uwb_ie_drp_set_tiebreaker(drp_ie, tiebreaker & 1); } return drp_ie; } @@ -104,43 +189,17 @@ static void uwb_drp_ie_from_bm(struct uwb_ie_drp *drp_ie, */ int uwb_drp_ie_update(struct uwb_rsv *rsv) { - struct device *dev = &rsv->rc->uwb_dev.dev; struct uwb_ie_drp *drp_ie; - int reason_code, status; + struct uwb_rsv_move *mv; + int unsafe; - switch (rsv->state) { - case UWB_RSV_STATE_NONE: + if (rsv->state == UWB_RSV_STATE_NONE) { kfree(rsv->drp_ie); rsv->drp_ie = NULL; return 0; - case UWB_RSV_STATE_O_INITIATED: - reason_code = UWB_DRP_REASON_ACCEPTED; - status = 0; - break; - case UWB_RSV_STATE_O_PENDING: - reason_code = UWB_DRP_REASON_ACCEPTED; - status = 0; - break; - case UWB_RSV_STATE_O_MODIFIED: - reason_code = UWB_DRP_REASON_MODIFIED; - status = 1; - break; - case UWB_RSV_STATE_O_ESTABLISHED: - reason_code = UWB_DRP_REASON_ACCEPTED; - status = 1; - break; - case UWB_RSV_STATE_T_ACCEPTED: - reason_code = UWB_DRP_REASON_ACCEPTED; - status = 1; - break; - case UWB_RSV_STATE_T_DENIED: - reason_code = UWB_DRP_REASON_DENIED; - status = 0; - break; - default: - dev_dbg(dev, "rsv with unhandled state (%d)\n", rsv->state); - return -EINVAL; } + + unsafe = rsv->mas.unsafe ? 1 : 0; if (rsv->drp_ie == NULL) { rsv->drp_ie = uwb_drp_ie_alloc(); @@ -149,9 +208,11 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv) } drp_ie = rsv->drp_ie; + uwb_ie_drp_set_unsafe(drp_ie, unsafe); + uwb_ie_drp_set_tiebreaker(drp_ie, rsv->tiebreaker); uwb_ie_drp_set_owner(drp_ie, uwb_rsv_is_owner(rsv)); - uwb_ie_drp_set_status(drp_ie, status); - uwb_ie_drp_set_reason_code(drp_ie, reason_code); + uwb_ie_drp_set_status(drp_ie, uwb_rsv_status(rsv)); + uwb_ie_drp_set_reason_code(drp_ie, uwb_rsv_reason_code(rsv)); uwb_ie_drp_set_stream_index(drp_ie, rsv->stream); uwb_ie_drp_set_type(drp_ie, rsv->type); @@ -169,6 +230,27 @@ int uwb_drp_ie_update(struct uwb_rsv *rsv) uwb_drp_ie_from_bm(drp_ie, &rsv->mas); + if (uwb_rsv_has_two_drp_ies(rsv)) { + mv = &rsv->mv; + if (mv->companion_drp_ie == NULL) { + mv->companion_drp_ie = uwb_drp_ie_alloc(); + if (mv->companion_drp_ie == NULL) + return -ENOMEM; + } + drp_ie = mv->companion_drp_ie; + + /* keep all the same configuration of the main drp_ie */ + memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp)); + + + /* FIXME: handle properly the unsafe bit */ + uwb_ie_drp_set_unsafe(drp_ie, 1); + uwb_ie_drp_set_status(drp_ie, uwb_rsv_companion_status(rsv)); + uwb_ie_drp_set_reason_code(drp_ie, uwb_rsv_companion_reason_code(rsv)); + + uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas); + } + rsv->ie_valid = true; return 0; } @@ -219,6 +301,8 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie) u8 zone; u16 zone_mask; + bitmap_zero(bm->bm, UWB_NUM_MAS); + for (cnt = 0; cnt < numallocs; cnt++) { alloc = &drp_ie->allocs[cnt]; zone_bm = le16_to_cpu(alloc->zone_bm); @@ -230,3 +314,4 @@ void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie) } } } + diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c index c0b1e5e2bd08..2b4f9406789d 100644 --- a/drivers/uwb/drp.c +++ b/drivers/uwb/drp.c @@ -23,6 +23,59 @@ #include <linux/delay.h> #include "uwb-internal.h" + +/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */ +enum uwb_drp_conflict_action { + /* Reservation is mantained, no action needed */ + UWB_DRP_CONFLICT_MANTAIN = 0, + + /* the device shall not transmit frames in conflicting MASs in + * the following superframe. If the device is the reservation + * target, it shall also set the Reason Code in its DRP IE to + * Conflict in its beacon in the following superframe. + */ + UWB_DRP_CONFLICT_ACT1, + + /* the device shall not set the Reservation Status bit to ONE + * and shall not transmit frames in conflicting MASs. If the + * device is the reservation target, it shall also set the + * Reason Code in its DRP IE to Conflict. + */ + UWB_DRP_CONFLICT_ACT2, + + /* the device shall not transmit frames in conflicting MASs in + * the following superframe. It shall remove the conflicting + * MASs from the reservation or set the Reservation Status to + * ZERO in its beacon in the following superframe. If the + * device is the reservation target, it shall also set the + * Reason Code in its DRP IE to Conflict. + */ + UWB_DRP_CONFLICT_ACT3, +}; + + +static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg, + struct uwb_rceb *reply, ssize_t reply_size) +{ + struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply; + + if (r != NULL) { + if (r->bResultCode != UWB_RC_RES_SUCCESS) + dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n", + uwb_rc_strerror(r->bResultCode), r->bResultCode); + } else + dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n"); + + spin_lock(&rc->rsvs_lock); + if (rc->set_drp_ie_pending > 1) { + rc->set_drp_ie_pending = 0; + uwb_rsv_queue_update(rc); + } else { + rc->set_drp_ie_pending = 0; + } + spin_unlock(&rc->rsvs_lock); +} + /** * Construct and send the SET DRP IE * @@ -37,28 +90,32 @@ * * A DRP Availability IE is appended. * - * rc->uwb_dev.mutex is held + * rc->rsvs_mutex is held * * FIXME We currently ignore the returned value indicating the remaining space * in beacon. This could be used to deny reservation requests earlier if * determined that they would cause the beacon space to be exceeded. */ -static -int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc) +int uwb_rc_send_all_drp_ie(struct uwb_rc *rc) { int result; - struct device *dev = &rc->uwb_dev.dev; struct uwb_rc_cmd_set_drp_ie *cmd; - struct uwb_rc_evt_set_drp_ie reply; struct uwb_rsv *rsv; + struct uwb_rsv_move *mv; int num_bytes = 0; u8 *IEDataptr; result = -ENOMEM; /* First traverse all reservations to determine memory needed. */ list_for_each_entry(rsv, &rc->reservations, rc_node) { - if (rsv->drp_ie != NULL) + if (rsv->drp_ie != NULL) { num_bytes += rsv->drp_ie->hdr.length + 2; + if (uwb_rsv_has_two_drp_ies(rsv) && + (rsv->mv.companion_drp_ie != NULL)) { + mv = &rsv->mv; + num_bytes += mv->companion_drp_ie->hdr.length + 2; + } + } } num_bytes += sizeof(rc->drp_avail.ie); cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL); @@ -69,128 +126,322 @@ int uwb_rc_gen_send_drp_ie(struct uwb_rc *rc) cmd->wIELength = num_bytes; IEDataptr = (u8 *)&cmd->IEData[0]; + /* FIXME: DRV avail IE is not always needed */ + /* put DRP avail IE first */ + memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie)); + IEDataptr += sizeof(struct uwb_ie_drp_avail); + /* Next traverse all reservations to place IEs in allocated memory. */ list_for_each_entry(rsv, &rc->reservations, rc_node) { if (rsv->drp_ie != NULL) { memcpy(IEDataptr, rsv->drp_ie, rsv->drp_ie->hdr.length + 2); IEDataptr += rsv->drp_ie->hdr.length + 2; + + if (uwb_rsv_has_two_drp_ies(rsv) && + (rsv->mv.companion_drp_ie != NULL)) { + mv = &rsv->mv; + memcpy(IEDataptr, mv->companion_drp_ie, + mv->companion_drp_ie->hdr.length + 2); + IEDataptr += mv->companion_drp_ie->hdr.length + 2; + } } } - memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie)); - reply.rceb.bEventType = UWB_RC_CET_GENERAL; - reply.rceb.wEvent = UWB_RC_CMD_SET_DRP_IE; - result = uwb_rc_cmd(rc, "SET-DRP-IE", &cmd->rccb, - sizeof(*cmd) + num_bytes, &reply.rceb, - sizeof(reply)); - if (result < 0) - goto error_cmd; - result = le16_to_cpu(reply.wRemainingSpace); - if (reply.bResultCode != UWB_RC_RES_SUCCESS) { - dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: command execution " - "failed: %s (%d). RemainingSpace in beacon " - "= %d\n", uwb_rc_strerror(reply.bResultCode), - reply.bResultCode, result); - result = -EIO; - } else { - dev_dbg(dev, "SET-DRP-IE sent. RemainingSpace in beacon " - "= %d.\n", result); - result = 0; - } -error_cmd: + result = uwb_rc_cmd_async(rc, "SET-DRP-IE", &cmd->rccb, sizeof(*cmd) + num_bytes, + UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE, + uwb_rc_set_drp_cmd_done, NULL); + + rc->set_drp_ie_pending = 1; + kfree(cmd); error: return result; - } -/** - * Send all DRP IEs associated with this host - * - * @returns: >= 0 number of bytes still available in the beacon - * < 0 errno code on error. + +/* + * Evaluate the action to perform using conflict resolution rules * - * As per the protocol we obtain the host controller device lock to access - * bandwidth structures. + * Return a uwb_drp_conflict_action. */ -int uwb_rc_send_all_drp_ie(struct uwb_rc *rc) +static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot, + struct uwb_rsv *rsv, int our_status) { - int result; + int our_tie_breaker = rsv->tiebreaker; + int our_type = rsv->type; + int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot; + + int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie); + int ext_status = uwb_ie_drp_status(ext_drp_ie); + int ext_type = uwb_ie_drp_type(ext_drp_ie); + + + /* [ECMA-368 2nd Edition] 17.4.6 */ + if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) { + return UWB_DRP_CONFLICT_MANTAIN; + } - mutex_lock(&rc->uwb_dev.mutex); - result = uwb_rc_gen_send_drp_ie(rc); - mutex_unlock(&rc->uwb_dev.mutex); - return result; + /* [ECMA-368 2nd Edition] 17.4.6-1 */ + if (our_type == UWB_DRP_TYPE_ALIEN_BP) { + return UWB_DRP_CONFLICT_MANTAIN; + } + + /* [ECMA-368 2nd Edition] 17.4.6-2 */ + if (ext_type == UWB_DRP_TYPE_ALIEN_BP) { + /* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */ + return UWB_DRP_CONFLICT_ACT1; + } + + /* [ECMA-368 2nd Edition] 17.4.6-3 */ + if (our_status == 0 && ext_status == 1) { + return UWB_DRP_CONFLICT_ACT2; + } + + /* [ECMA-368 2nd Edition] 17.4.6-4 */ + if (our_status == 1 && ext_status == 0) { + return UWB_DRP_CONFLICT_MANTAIN; + } + + /* [ECMA-368 2nd Edition] 17.4.6-5a */ + if (our_tie_breaker == ext_tie_breaker && + our_beacon_slot < ext_beacon_slot) { + return UWB_DRP_CONFLICT_MANTAIN; + } + + /* [ECMA-368 2nd Edition] 17.4.6-5b */ + if (our_tie_breaker != ext_tie_breaker && + our_beacon_slot > ext_beacon_slot) { + return UWB_DRP_CONFLICT_MANTAIN; + } + + if (our_status == 0) { + if (our_tie_breaker == ext_tie_breaker) { + /* [ECMA-368 2nd Edition] 17.4.6-6a */ + if (our_beacon_slot > ext_beacon_slot) { + return UWB_DRP_CONFLICT_ACT2; + } + } else { + /* [ECMA-368 2nd Edition] 17.4.6-6b */ + if (our_beacon_slot < ext_beacon_slot) { + return UWB_DRP_CONFLICT_ACT2; + } + } + } else { + if (our_tie_breaker == ext_tie_breaker) { + /* [ECMA-368 2nd Edition] 17.4.6-7a */ + if (our_beacon_slot > ext_beacon_slot) { + return UWB_DRP_CONFLICT_ACT3; + } + } else { + /* [ECMA-368 2nd Edition] 17.4.6-7b */ + if (our_beacon_slot < ext_beacon_slot) { + return UWB_DRP_CONFLICT_ACT3; + } + } + } + return UWB_DRP_CONFLICT_MANTAIN; } -void uwb_drp_handle_timeout(struct uwb_rsv *rsv) +static void handle_conflict_normal(struct uwb_ie_drp *drp_ie, + int ext_beacon_slot, + struct uwb_rsv *rsv, + struct uwb_mas_bm *conflicting_mas) { - struct device *dev = &rsv->rc->uwb_dev.dev; + struct uwb_rc *rc = rsv->rc; + struct uwb_rsv_move *mv = &rsv->mv; + struct uwb_drp_backoff_win *bow = &rc->bow; + int action; + + action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv)); + + if (uwb_rsv_is_owner(rsv)) { + switch(action) { + case UWB_DRP_CONFLICT_ACT2: + /* try move */ + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED); + if (bow->can_reserve_extra_mases == false) + uwb_rsv_backoff_win_increment(rc); + + break; + case UWB_DRP_CONFLICT_ACT3: + uwb_rsv_backoff_win_increment(rc); + /* drop some mases with reason modified */ + /* put in the companion the mases to be dropped */ + bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS); + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED); + default: + break; + } + } else { + switch(action) { + case UWB_DRP_CONFLICT_ACT2: + case UWB_DRP_CONFLICT_ACT3: + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT); + default: + break; + } - dev_dbg(dev, "reservation timeout in state %s (%d)\n", - uwb_rsv_state_str(rsv->state), rsv->state); + } + +} - switch (rsv->state) { - case UWB_RSV_STATE_O_INITIATED: - if (rsv->is_multicast) { - uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); - return; +static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot, + struct uwb_rsv *rsv, bool companion_only, + struct uwb_mas_bm *conflicting_mas) +{ + struct uwb_rc *rc = rsv->rc; + struct uwb_drp_backoff_win *bow = &rc->bow; + struct uwb_rsv_move *mv = &rsv->mv; + int action; + + if (companion_only) { + /* status of companion is 0 at this point */ + action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0); + if (uwb_rsv_is_owner(rsv)) { + switch(action) { + case UWB_DRP_CONFLICT_ACT2: + case UWB_DRP_CONFLICT_ACT3: + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + rsv->needs_release_companion_mas = false; + if (bow->can_reserve_extra_mases == false) + uwb_rsv_backoff_win_increment(rc); + uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas); + } + } else { /* rsv is target */ + switch(action) { + case UWB_DRP_CONFLICT_ACT2: + case UWB_DRP_CONFLICT_ACT3: + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_CONFLICT); + /* send_drp_avail_ie = true; */ + } } - break; - case UWB_RSV_STATE_O_ESTABLISHED: - if (rsv->is_multicast) - return; - break; - default: - break; + } else { /* also base part of the reservation is conflicting */ + if (uwb_rsv_is_owner(rsv)) { + uwb_rsv_backoff_win_increment(rc); + /* remove companion part */ + uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas); + + /* drop some mases with reason modified */ + + /* put in the companion the mases to be dropped */ + bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS); + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED); + } else { /* it is a target rsv */ + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT); + /* send_drp_avail_ie = true; */ + } + } +} + +static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv, + struct uwb_rc_evt_drp *drp_evt, + struct uwb_ie_drp *drp_ie, + struct uwb_mas_bm *conflicting_mas) +{ + struct uwb_rsv_move *mv; + + /* check if the conflicting reservation has two drp_ies */ + if (uwb_rsv_has_two_drp_ies(rsv)) { + mv = &rsv->mv; + if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) { + handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number, + rsv, false, conflicting_mas); + } else { + if (bitmap_intersects(mv->companion_mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) { + handle_conflict_expanding(drp_ie, drp_evt->beacon_slot_number, + rsv, true, conflicting_mas); + } + } + } else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS)) { + handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number, rsv, conflicting_mas); } - uwb_rsv_remove(rsv); } +static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc, + struct uwb_rc_evt_drp *drp_evt, + struct uwb_ie_drp *drp_ie, + struct uwb_mas_bm *conflicting_mas) +{ + struct uwb_rsv *rsv; + + list_for_each_entry(rsv, &rc->reservations, rc_node) { + uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, conflicting_mas); + } +} + /* * Based on the DRP IE, transition a target reservation to a new * state. */ static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv, - struct uwb_ie_drp *drp_ie) + struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt) { struct device *dev = &rc->uwb_dev.dev; + struct uwb_rsv_move *mv = &rsv->mv; int status; enum uwb_drp_reason reason_code; - + struct uwb_mas_bm mas; + status = uwb_ie_drp_status(drp_ie); reason_code = uwb_ie_drp_reason_code(drp_ie); + uwb_drp_ie_to_bm(&mas, drp_ie); - if (status) { - switch (reason_code) { - case UWB_DRP_REASON_ACCEPTED: - uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED); - break; - case UWB_DRP_REASON_MODIFIED: - dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n", - reason_code, status); + switch (reason_code) { + case UWB_DRP_REASON_ACCEPTED: + + if (rsv->state == UWB_RSV_STATE_T_CONFLICT) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT); break; - default: - dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n", - reason_code, status); } - } else { - switch (reason_code) { - case UWB_DRP_REASON_ACCEPTED: - /* New reservations are handled in uwb_rsv_find(). */ - break; - case UWB_DRP_REASON_DENIED: - uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); - break; - case UWB_DRP_REASON_CONFLICT: - case UWB_DRP_REASON_MODIFIED: - dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n", - reason_code, status); + + if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) { + /* drp_ie is companion */ + if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) + /* stroke companion */ + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED); + } else { + if (!bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) { + if (uwb_drp_avail_reserve_pending(rc, &mas) == -EBUSY) { + /* FIXME: there is a conflict, find + * the conflicting reservations and + * take a sensible action. Consider + * that in drp_ie there is the + * "neighbour" */ + uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas); + } else { + /* accept the extra reservation */ + bitmap_copy(mv->companion_mas.bm, mas.bm, UWB_NUM_MAS); + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED); + } + } else { + if (status) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED); + } + } + + } + break; + + case UWB_DRP_REASON_MODIFIED: + /* check to see if we have already modified the reservation */ + if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED); break; - default: - dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n", - reason_code, status); } + + /* find if the owner wants to expand or reduce */ + if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) { + /* owner is reducing */ + bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm, UWB_NUM_MAS); + uwb_drp_avail_release(rsv->rc, &mv->companion_mas); + } + + bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS); + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED); + break; + default: + dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n", + reason_code, status); } } @@ -199,23 +450,60 @@ static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv, * state. */ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv, - struct uwb_ie_drp *drp_ie) + struct uwb_dev *src, struct uwb_ie_drp *drp_ie, + struct uwb_rc_evt_drp *drp_evt) { struct device *dev = &rc->uwb_dev.dev; + struct uwb_rsv_move *mv = &rsv->mv; int status; enum uwb_drp_reason reason_code; + struct uwb_mas_bm mas; status = uwb_ie_drp_status(drp_ie); reason_code = uwb_ie_drp_reason_code(drp_ie); + uwb_drp_ie_to_bm(&mas, drp_ie); if (status) { switch (reason_code) { case UWB_DRP_REASON_ACCEPTED: - uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); - break; - case UWB_DRP_REASON_MODIFIED: - dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n", - reason_code, status); + switch (rsv->state) { + case UWB_RSV_STATE_O_PENDING: + case UWB_RSV_STATE_O_INITIATED: + case UWB_RSV_STATE_O_ESTABLISHED: + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + break; + case UWB_RSV_STATE_O_MODIFIED: + if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + } else { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED); + } + break; + + case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */ + if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + } else { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING); + } + break; + case UWB_RSV_STATE_O_MOVE_EXPANDING: + if (bitmap_equal(mas.bm, mv->companion_mas.bm, UWB_NUM_MAS)) { + /* Companion reservation accepted */ + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING); + } else { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING); + } + break; + case UWB_RSV_STATE_O_MOVE_COMBINING: + if (bitmap_equal(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING); + else + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING); + break; + default: + break; + } break; default: dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n", @@ -230,9 +518,10 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv, uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); break; case UWB_DRP_REASON_CONFLICT: - case UWB_DRP_REASON_MODIFIED: - dev_err(dev, "FIXME: unhandled reason code (%d/%d)\n", - reason_code, status); + /* resolve the conflict */ + bitmap_complement(mas.bm, src->last_availability_bm, + UWB_NUM_MAS); + uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas); break; default: dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n", @@ -241,12 +530,110 @@ static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv, } } +static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt) +{ + unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US; + mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us)); +} + +static void uwb_cnflt_update_work(struct work_struct *work) +{ + struct uwb_cnflt_alien *cnflt = container_of(work, + struct uwb_cnflt_alien, + cnflt_update_work); + struct uwb_cnflt_alien *c; + struct uwb_rc *rc = cnflt->rc; + + unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE; + + mutex_lock(&rc->rsvs_mutex); + + list_del(&cnflt->rc_node); + + /* update rc global conflicting alien bitmap */ + bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS); + + list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) { + bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, c->mas.bm, UWB_NUM_MAS); + } + + queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us)); + + kfree(cnflt); + mutex_unlock(&rc->rsvs_mutex); +} + +static void uwb_cnflt_timer(unsigned long arg) +{ + struct uwb_cnflt_alien *cnflt = (struct uwb_cnflt_alien *)arg; + + queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work); +} + /* - * Process a received DRP IE, it's either for a reservation owned by - * the RC or targeted at it (or it's for a WUSB cluster reservation). + * We have received an DRP_IE of type Alien BP and we need to make + * sure we do not transmit in conflicting MASs. */ -static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src, - struct uwb_ie_drp *drp_ie) +static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie) +{ + struct device *dev = &rc->uwb_dev.dev; + struct uwb_mas_bm mas; + struct uwb_cnflt_alien *cnflt; + char buf[72]; + unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE; + + uwb_drp_ie_to_bm(&mas, drp_ie); + bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS); + + list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) { + if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) { + /* Existing alien BP reservation conflicting + * bitmap, just reset the timer */ + uwb_cnflt_alien_stroke_timer(cnflt); + return; + } + } + + /* New alien BP reservation conflicting bitmap */ + + /* alloc and initialize new uwb_cnflt_alien */ + cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL); + if (!cnflt) + dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n"); + INIT_LIST_HEAD(&cnflt->rc_node); + init_timer(&cnflt->timer); + cnflt->timer.function = uwb_cnflt_timer; + cnflt->timer.data = (unsigned long)cnflt; + + cnflt->rc = rc; + INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work); + + bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS); + + list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list); + + /* update rc global conflicting alien bitmap */ + bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS); + + queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us)); + + /* start the timer */ + uwb_cnflt_alien_stroke_timer(cnflt); +} + +static void uwb_drp_process_not_involved(struct uwb_rc *rc, + struct uwb_rc_evt_drp *drp_evt, + struct uwb_ie_drp *drp_ie) +{ + struct uwb_mas_bm mas; + + uwb_drp_ie_to_bm(&mas, drp_ie); + uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas); +} + +static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src, + struct uwb_rc_evt_drp *drp_evt, + struct uwb_ie_drp *drp_ie) { struct uwb_rsv *rsv; @@ -259,7 +646,7 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src, */ return; } - + /* * Do nothing with DRP IEs for reservations that have been * terminated. @@ -268,13 +655,43 @@ static void uwb_drp_process(struct uwb_rc *rc, struct uwb_dev *src, uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); return; } - + if (uwb_ie_drp_owner(drp_ie)) - uwb_drp_process_target(rc, rsv, drp_ie); + uwb_drp_process_target(rc, rsv, drp_ie, drp_evt); + else + uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt); + +} + + +static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie) +{ + return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0; +} + +/* + * Process a received DRP IE. + */ +static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt, + struct uwb_dev *src, struct uwb_ie_drp *drp_ie) +{ + if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP) + uwb_drp_handle_alien_drp(rc, drp_ie); + else if (uwb_drp_involves_us(rc, drp_ie)) + uwb_drp_process_involved(rc, src, drp_evt, drp_ie); else - uwb_drp_process_owner(rc, rsv, drp_ie); + uwb_drp_process_not_involved(rc, drp_evt, drp_ie); } +/* + * Process a received DRP Availability IE + */ +static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src, + struct uwb_ie_drp_avail *drp_availability_ie) +{ + bitmap_copy(src->last_availability_bm, + drp_availability_ie->bmp, UWB_NUM_MAS); +} /* * Process all the DRP IEs (both DRP IEs and the DRP Availability IE) @@ -296,10 +713,10 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt, switch (ie_hdr->element_id) { case UWB_IE_DRP_AVAILABILITY: - /* FIXME: does something need to be done with this? */ + uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr); break; case UWB_IE_DRP: - uwb_drp_process(rc, src_dev, (struct uwb_ie_drp *)ie_hdr); + uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr); break; default: dev_warn(dev, "unexpected IE in DRP notification\n"); @@ -312,55 +729,6 @@ void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt, (int)ielen); } - -/* - * Go through all the DRP IEs and find the ones that conflict with our - * reservations. - * - * FIXME: must resolve the conflict according the the rules in - * [ECMA-368]. - */ -static -void uwb_drp_process_conflict_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt, - size_t ielen, struct uwb_dev *src_dev) -{ - struct device *dev = &rc->uwb_dev.dev; - struct uwb_ie_hdr *ie_hdr; - struct uwb_ie_drp *drp_ie; - void *ptr; - - ptr = drp_evt->ie_data; - for (;;) { - ie_hdr = uwb_ie_next(&ptr, &ielen); - if (!ie_hdr) - break; - - drp_ie = container_of(ie_hdr, struct uwb_ie_drp, hdr); - - /* FIXME: check if this DRP IE conflicts. */ - } - - if (ielen > 0) - dev_warn(dev, "%d octets remaining in DRP notification\n", - (int)ielen); -} - - -/* - * Terminate all reservations owned by, or targeted at, 'uwb_dev'. - */ -static void uwb_drp_terminate_all(struct uwb_rc *rc, struct uwb_dev *uwb_dev) -{ - struct uwb_rsv *rsv; - - list_for_each_entry(rsv, &rc->reservations, rc_node) { - if (rsv->owner == uwb_dev - || (rsv->target.type == UWB_RSV_TARGET_DEV && rsv->target.dev == uwb_dev)) - uwb_rsv_remove(rsv); - } -} - - /** * uwbd_evt_handle_rc_drp - handle a DRP_IE event * @evt: the DRP_IE event from the radio controller @@ -401,7 +769,6 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt) size_t ielength, bytes_left; struct uwb_dev_addr src_addr; struct uwb_dev *src_dev; - int reason; /* Is there enough data to decode the event (and any IEs in its payload)? */ @@ -437,22 +804,8 @@ int uwbd_evt_handle_rc_drp(struct uwb_event *evt) mutex_lock(&rc->rsvs_mutex); - reason = uwb_rc_evt_drp_reason(drp_evt); - - switch (reason) { - case UWB_DRP_NOTIF_DRP_IE_RCVD: - uwb_drp_process_all(rc, drp_evt, ielength, src_dev); - break; - case UWB_DRP_NOTIF_CONFLICT: - uwb_drp_process_conflict_all(rc, drp_evt, ielength, src_dev); - break; - case UWB_DRP_NOTIF_TERMINATE: - uwb_drp_terminate_all(rc, src_dev); - break; - default: - dev_warn(dev, "ignored DRP event with reason code: %d\n", reason); - break; - } + /* We do not distinguish from the reason */ + uwb_drp_process_all(rc, drp_evt, ielength, src_dev); mutex_unlock(&rc->rsvs_mutex); diff --git a/drivers/uwb/est.c b/drivers/uwb/est.c index 5fe566b7c845..328fcc2b6099 100644 --- a/drivers/uwb/est.c +++ b/drivers/uwb/est.c @@ -40,10 +40,8 @@ * uwb_est_get_size() */ #include <linux/spinlock.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> -#include "uwb-internal.h" +#include "uwb-internal.h" struct uwb_est { u16 type_event_high; @@ -52,7 +50,6 @@ struct uwb_est { const struct uwb_est_entry *entry; }; - static struct uwb_est *uwb_est; static u8 uwb_est_size; static u8 uwb_est_used; @@ -440,21 +437,12 @@ ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb, u8 *ptr = (u8 *) rceb; read_lock_irqsave(&uwb_est_lock, flags); - d_printf(2, dev, "Size query for event 0x%02x/%04x/%02x," - " buffer size %ld\n", - (unsigned) rceb->bEventType, - (unsigned) le16_to_cpu(rceb->wEvent), - (unsigned) rceb->bEventContext, - (long) rceb_size); size = -ENOSPC; if (rceb_size < sizeof(*rceb)) goto out; event = le16_to_cpu(rceb->wEvent); type_event_high = rceb->bEventType << 8 | (event & 0xff00) >> 8; for (itr = 0; itr < uwb_est_used; itr++) { - d_printf(3, dev, "Checking EST 0x%04x/%04x/%04x\n", - uwb_est[itr].type_event_high, uwb_est[itr].vendor, - uwb_est[itr].product); if (uwb_est[itr].type_event_high != type_event_high) continue; size = uwb_est_get_size(rc, &uwb_est[itr], diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c index 3d26fa0f8ae1..559f8784acf3 100644 --- a/drivers/uwb/hwa-rc.c +++ b/drivers/uwb/hwa-rc.c @@ -51,16 +51,14 @@ * * */ -#include <linux/version.h> #include <linux/init.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/wusb.h> #include <linux/usb/wusb-wa.h> #include <linux/uwb.h> + #include "uwb-internal.h" -#define D_LOCAL 1 -#include <linux/uwb/debug.h> /* The device uses commands and events from the WHCI specification, although * reporting itself as WUSB compliant. */ @@ -631,17 +629,13 @@ void hwarc_neep_cb(struct urb *urb) switch (result = urb->status) { case 0: - d_printf(3, dev, "NEEP: receive stat %d, %zu bytes\n", - urb->status, (size_t)urb->actual_length); uwb_rc_neh_grok(hwarc->uwb_rc, urb->transfer_buffer, urb->actual_length); break; case -ECONNRESET: /* Not an error, but a controlled situation; */ case -ENOENT: /* (we killed the URB)...so, no broadcast */ - d_printf(2, dev, "NEEP: URB reset/noent %d\n", urb->status); goto out; case -ESHUTDOWN: /* going away! */ - d_printf(2, dev, "NEEP: URB down %d\n", urb->status); goto out; default: /* On general errors, retry unless it gets ugly */ if (edc_inc(&hwarc->neep_edc, EDC_MAX_ERRORS, @@ -650,7 +644,6 @@ void hwarc_neep_cb(struct urb *urb) dev_err(dev, "NEEP: URB error %d\n", urb->status); } result = usb_submit_urb(urb, GFP_ATOMIC); - d_printf(3, dev, "NEEP: submit %d\n", result); if (result < 0) { dev_err(dev, "NEEP: Can't resubmit URB (%d) resetting device\n", result); @@ -759,11 +752,11 @@ static int hwarc_get_version(struct uwb_rc *rc) itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength); while (itr_size >= sizeof(*hdr)) { hdr = (struct usb_descriptor_header *) itr; - d_printf(3, dev, "Extra device descriptor: " - "type %02x/%u bytes @ %zu (%zu left)\n", - hdr->bDescriptorType, hdr->bLength, - (itr - usb_dev->rawdescriptors[actconfig_idx]), - itr_size); + dev_dbg(dev, "Extra device descriptor: " + "type %02x/%u bytes @ %zu (%zu left)\n", + hdr->bDescriptorType, hdr->bLength, + (itr - usb_dev->rawdescriptors[actconfig_idx]), + itr_size); if (hdr->bDescriptorType == USB_DT_CS_RADIO_CONTROL) goto found; itr += hdr->bLength; @@ -795,8 +788,7 @@ found: goto error; } rc->version = version; - d_printf(3, dev, "Device supports WUSB protocol version 0x%04x \n", - rc->version); + dev_dbg(dev, "Device supports WUSB protocol version 0x%04x \n", rc->version); result = 0; error: return result; @@ -877,11 +869,28 @@ static void hwarc_disconnect(struct usb_interface *iface) uwb_rc_rm(uwb_rc); usb_put_intf(hwarc->usb_iface); usb_put_dev(hwarc->usb_dev); - d_printf(1, &hwarc->usb_iface->dev, "freed hwarc %p\n", hwarc); kfree(hwarc); uwb_rc_put(uwb_rc); /* when creating the device, refcount = 1 */ } +static int hwarc_pre_reset(struct usb_interface *iface) +{ + struct hwarc *hwarc = usb_get_intfdata(iface); + struct uwb_rc *uwb_rc = hwarc->uwb_rc; + + uwb_rc_pre_reset(uwb_rc); + return 0; +} + +static int hwarc_post_reset(struct usb_interface *iface) +{ + struct hwarc *hwarc = usb_get_intfdata(iface); + struct uwb_rc *uwb_rc = hwarc->uwb_rc; + + uwb_rc_post_reset(uwb_rc); + return 0; +} + /** USB device ID's that we handle */ static struct usb_device_id hwarc_id_table[] = { /* D-Link DUB-1210 */ @@ -898,20 +907,16 @@ MODULE_DEVICE_TABLE(usb, hwarc_id_table); static struct usb_driver hwarc_driver = { .name = "hwa-rc", + .id_table = hwarc_id_table, .probe = hwarc_probe, .disconnect = hwarc_disconnect, - .id_table = hwarc_id_table, + .pre_reset = hwarc_pre_reset, + .post_reset = hwarc_post_reset, }; static int __init hwarc_driver_init(void) { - int result; - result = usb_register(&hwarc_driver); - if (result < 0) - printk(KERN_ERR "HWA-RC: Cannot register USB driver: %d\n", - result); - return result; - + return usb_register(&hwarc_driver); } module_init(hwarc_driver_init); diff --git a/drivers/uwb/i1480/dfu/dfu.c b/drivers/uwb/i1480/dfu/dfu.c index 9097b3b30385..da7b1d08003c 100644 --- a/drivers/uwb/i1480/dfu/dfu.c +++ b/drivers/uwb/i1480/dfu/dfu.c @@ -34,10 +34,7 @@ #include <linux/uwb.h> #include <linux/random.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - -/** +/* * i1480_rceb_check - Check RCEB for expected field values * @i1480: pointer to device for which RCEB is being checked * @rceb: RCEB being checked @@ -83,7 +80,7 @@ int i1480_rceb_check(const struct i1480 *i1480, const struct uwb_rceb *rceb, EXPORT_SYMBOL_GPL(i1480_rceb_check); -/** +/* * Execute a Radio Control Command * * Command data has to be in i1480->cmd_buf. @@ -101,7 +98,6 @@ ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size, u8 expected_type = reply->bEventType; u8 context; - d_fnstart(3, i1480->dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size); init_completion(&i1480->evt_complete); i1480->evt_result = -EINPROGRESS; do { @@ -150,8 +146,6 @@ ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size, result = i1480_rceb_check(i1480, i1480->evt_buf, cmd_name, context, expected_type, expected_event); error: - d_fnend(3, i1480->dev, "(%p, %s, %zu) = %zd\n", - i1480, cmd_name, cmd_size, result); return result; } EXPORT_SYMBOL_GPL(i1480_cmd); diff --git a/drivers/uwb/i1480/dfu/mac.c b/drivers/uwb/i1480/dfu/mac.c index 2e4d8f07c165..694d0daf88ab 100644 --- a/drivers/uwb/i1480/dfu/mac.c +++ b/drivers/uwb/i1480/dfu/mac.c @@ -31,9 +31,6 @@ #include <linux/uwb.h> #include "i1480-dfu.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - /* * Descriptor for a continuous segment of MAC fw data */ @@ -184,10 +181,6 @@ ssize_t i1480_fw_cmp(struct i1480 *i1480, struct fw_hdr *hdr) } if (memcmp(i1480->cmd_buf, bin + src_itr, result)) { u8 *buf = i1480->cmd_buf; - d_printf(2, i1480->dev, - "original data @ %p + %u, %zu bytes\n", - bin, src_itr, result); - d_dump(4, i1480->dev, bin + src_itr, result); for (cnt = 0; cnt < result; cnt++) if (bin[src_itr + cnt] != buf[cnt]) { dev_err(i1480->dev, "byte failed at " @@ -224,7 +217,6 @@ int mac_fw_hdrs_push(struct i1480 *i1480, struct fw_hdr *hdr, struct fw_hdr *hdr_itr; int verif_retry_count; - d_fnstart(3, dev, "(%p, %p)\n", i1480, hdr); /* Now, header by header, push them to the hw */ for (hdr_itr = hdr; hdr_itr != NULL; hdr_itr = hdr_itr->next) { verif_retry_count = 0; @@ -264,7 +256,6 @@ retry: break; } } - d_fnend(3, dev, "(%zd)\n", result); return result; } @@ -337,11 +328,9 @@ int __mac_fw_upload(struct i1480 *i1480, const char *fw_name, const struct firmware *fw; struct fw_hdr *fw_hdrs; - d_fnstart(3, i1480->dev, "(%p, %s, %s)\n", i1480, fw_name, fw_tag); result = request_firmware(&fw, fw_name, i1480->dev); if (result < 0) /* Up to caller to complain on -ENOENT */ goto out; - d_printf(3, i1480->dev, "%s fw '%s': uploading\n", fw_tag, fw_name); result = fw_hdrs_load(i1480, &fw_hdrs, fw->data, fw->size); if (result < 0) { dev_err(i1480->dev, "%s fw '%s': failed to parse firmware " @@ -363,8 +352,6 @@ out_hdrs_release: out_release: release_firmware(fw); out: - d_fnend(3, i1480->dev, "(%p, %s, %s) = %d\n", i1480, fw_name, fw_tag, - result); return result; } @@ -433,7 +420,6 @@ int i1480_fw_is_running_q(struct i1480 *i1480) int result; u32 *val = (u32 *) i1480->cmd_buf; - d_fnstart(3, i1480->dev, "(i1480 %p)\n", i1480); for (cnt = 0; cnt < 10; cnt++) { msleep(100); result = i1480->read(i1480, 0x80080000, 4); @@ -447,7 +433,6 @@ int i1480_fw_is_running_q(struct i1480 *i1480) dev_err(i1480->dev, "Timed out waiting for fw to start\n"); result = -ETIMEDOUT; out: - d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result); return result; } @@ -467,7 +452,6 @@ int i1480_mac_fw_upload(struct i1480 *i1480) int result = 0, deprecated_name = 0; struct i1480_rceb *rcebe = (void *) i1480->evt_buf; - d_fnstart(3, i1480->dev, "(%p)\n", i1480); result = __mac_fw_upload(i1480, i1480->mac_fw_name, "MAC"); if (result == -ENOENT) { result = __mac_fw_upload(i1480, i1480->mac_fw_name_deprecate, @@ -501,7 +485,6 @@ int i1480_mac_fw_upload(struct i1480 *i1480) dev_err(i1480->dev, "MAC fw '%s': initialization event returns " "wrong size (%zu bytes vs %zu needed)\n", i1480->mac_fw_name, i1480->evt_result, sizeof(*rcebe)); - dump_bytes(i1480->dev, rcebe, min(i1480->evt_result, (ssize_t)32)); goto error_size; } result = -EIO; @@ -522,6 +505,5 @@ error_fw_not_running: error_init_timeout: error_size: error_setup: - d_fnend(3, i1480->dev, "(i1480 %p) = %d\n", i1480, result); return result; } diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c index 98eeeff051aa..686795e97195 100644 --- a/drivers/uwb/i1480/dfu/usb.c +++ b/drivers/uwb/i1480/dfu/usb.c @@ -35,7 +35,6 @@ * the functions are i1480_usb_NAME(). */ #include <linux/module.h> -#include <linux/version.h> #include <linux/usb.h> #include <linux/interrupt.h> #include <linux/delay.h> @@ -44,10 +43,6 @@ #include <linux/usb/wusb-wa.h> #include "i1480-dfu.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - - struct i1480_usb { struct i1480 i1480; struct usb_device *usb_dev; @@ -118,8 +113,6 @@ int i1480_usb_write(struct i1480 *i1480, u32 memory_address, struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480); size_t buffer_size, itr = 0; - d_fnstart(3, i1480->dev, "(%p, 0x%08x, %p, %zu)\n", - i1480, memory_address, buffer, size); BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */ while (size > 0) { buffer_size = size < i1480->buf_size ? size : i1480->buf_size; @@ -132,16 +125,10 @@ int i1480_usb_write(struct i1480 *i1480, u32 memory_address, i1480->cmd_buf, buffer_size, 100 /* FIXME: arbitrary */); if (result < 0) break; - d_printf(3, i1480->dev, - "wrote @ 0x%08x %u bytes (of %zu bytes requested)\n", - memory_address, result, buffer_size); - d_dump(4, i1480->dev, i1480->cmd_buf, result); itr += result; memory_address += result; size -= result; } - d_fnend(3, i1480->dev, "(%p, 0x%08x, %p, %zu) = %d\n", - i1480, memory_address, buffer, size, result); return result; } @@ -166,8 +153,6 @@ int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size) size_t itr, read_size = i1480->buf_size; struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480); - d_fnstart(3, i1480->dev, "(%p, 0x%08x, %zu)\n", - i1480, addr, size); BUG_ON(size > i1480->buf_size); BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */ BUG_ON(read_size > 512); @@ -201,10 +186,6 @@ int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size) } result = bytes; out: - d_fnend(3, i1480->dev, "(%p, 0x%08x, %zu) = %zd\n", - i1480, addr, size, result); - if (result > 0) - d_dump(4, i1480->dev, i1480->cmd_buf, result); return result; } @@ -260,7 +241,6 @@ int i1480_usb_wait_init_done(struct i1480 *i1480) struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480); struct usb_endpoint_descriptor *epd; - d_fnstart(3, dev, "(%p)\n", i1480); init_completion(&i1480->evt_complete); i1480->evt_result = -EINPROGRESS; epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc; @@ -282,14 +262,12 @@ int i1480_usb_wait_init_done(struct i1480 *i1480) goto error_wait; } usb_kill_urb(i1480_usb->neep_urb); - d_fnend(3, dev, "(%p) = 0\n", i1480); return 0; error_wait: usb_kill_urb(i1480_usb->neep_urb); error_submit: i1480->evt_result = result; - d_fnend(3, dev, "(%p) = %d\n", i1480, result); return result; } @@ -320,7 +298,6 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size) struct uwb_rccb *cmd = i1480->cmd_buf; u8 iface_no; - d_fnstart(3, dev, "(%p, %s, %zu)\n", i1480, cmd_name, cmd_size); /* Post a read on the notification & event endpoint */ iface_no = i1480_usb->usb_iface->cur_altsetting->desc.bInterfaceNumber; epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc; @@ -348,15 +325,11 @@ int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size) cmd_name, result); goto error_submit_ep0; } - d_fnend(3, dev, "(%p, %s, %zu) = %d\n", - i1480, cmd_name, cmd_size, result); return result; error_submit_ep0: usb_kill_urb(i1480_usb->neep_urb); error_submit_ep1: - d_fnend(3, dev, "(%p, %s, %zu) = %d\n", - i1480, cmd_name, cmd_size, result); return result; } diff --git a/drivers/uwb/i1480/i1480u-wlp/lc.c b/drivers/uwb/i1480/i1480u-wlp/lc.c index 737d60cd5b73..049c05d4cc6a 100644 --- a/drivers/uwb/i1480/i1480u-wlp/lc.c +++ b/drivers/uwb/i1480/i1480u-wlp/lc.c @@ -55,10 +55,9 @@ * is being removed. * i1480u_rm() */ -#include <linux/version.h> #include <linux/if_arp.h> #include <linux/etherdevice.h> -#include <linux/uwb/debug.h> + #include "i1480u-wlp.h" @@ -207,7 +206,7 @@ int i1480u_add(struct i1480u *i1480u, struct usb_interface *iface) wlp->fill_device_info = i1480u_fill_device_info; wlp->stop_queue = i1480u_stop_queue; wlp->start_queue = i1480u_start_queue; - result = wlp_setup(wlp, rc); + result = wlp_setup(wlp, rc, net_dev); if (result < 0) { dev_err(&iface->dev, "Cannot setup WLP\n"); goto error_wlp_setup; diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c index 8802ac43d872..e3873ffb942c 100644 --- a/drivers/uwb/i1480/i1480u-wlp/netdev.c +++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c @@ -41,7 +41,7 @@ #include <linux/if_arp.h> #include <linux/etherdevice.h> -#include <linux/uwb/debug.h> + #include "i1480u-wlp.h" struct i1480u_cmd_set_ip_mas { @@ -207,6 +207,11 @@ int i1480u_open(struct net_device *net_dev) result = i1480u_rx_setup(i1480u); /* Alloc RX stuff */ if (result < 0) goto error_rx_setup; + + result = uwb_radio_start(&wlp->pal); + if (result < 0) + goto error_radio_start; + netif_wake_queue(net_dev); #ifdef i1480u_FLOW_CONTROL result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);; @@ -215,25 +220,20 @@ int i1480u_open(struct net_device *net_dev) goto error_notif_urb_submit; } #endif - i1480u->uwb_notifs_handler.cb = i1480u_uwb_notifs_cb; - i1480u->uwb_notifs_handler.data = i1480u; - if (uwb_bg_joined(rc)) - netif_carrier_on(net_dev); - else - netif_carrier_off(net_dev); - uwb_notifs_register(rc, &i1480u->uwb_notifs_handler); /* Interface is up with an address, now we can create WSS */ result = wlp_wss_setup(net_dev, &wlp->wss); if (result < 0) { dev_err(dev, "Can't create WSS: %d. \n", result); - goto error_notif_deregister; + goto error_wss_setup; } return 0; -error_notif_deregister: - uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler); +error_wss_setup: #ifdef i1480u_FLOW_CONTROL + usb_kill_urb(i1480u->notif_urb); error_notif_urb_submit: #endif + uwb_radio_stop(&wlp->pal); +error_radio_start: netif_stop_queue(net_dev); i1480u_rx_release(i1480u); error_rx_setup: @@ -248,16 +248,15 @@ int i1480u_stop(struct net_device *net_dev) { struct i1480u *i1480u = netdev_priv(net_dev); struct wlp *wlp = &i1480u->wlp; - struct uwb_rc *rc = wlp->rc; BUG_ON(wlp->rc == NULL); wlp_wss_remove(&wlp->wss); - uwb_notifs_deregister(rc, &i1480u->uwb_notifs_handler); netif_carrier_off(net_dev); #ifdef i1480u_FLOW_CONTROL usb_kill_urb(i1480u->notif_urb); #endif netif_stop_queue(net_dev); + uwb_radio_stop(&wlp->pal); i1480u_rx_release(i1480u); i1480u_tx_release(i1480u); return 0; @@ -303,34 +302,6 @@ int i1480u_change_mtu(struct net_device *net_dev, int mtu) return 0; } - -/** - * Callback function to handle events from UWB - * When we see other devices we know the carrier is ok, - * if we are the only device in the beacon group we set the carrier - * state to off. - * */ -void i1480u_uwb_notifs_cb(void *data, struct uwb_dev *uwb_dev, - enum uwb_notifs event) -{ - struct i1480u *i1480u = data; - struct net_device *net_dev = i1480u->net_dev; - struct device *dev = &i1480u->usb_iface->dev; - switch (event) { - case UWB_NOTIF_BG_JOIN: - netif_carrier_on(net_dev); - dev_info(dev, "Link is up\n"); - break; - case UWB_NOTIF_BG_LEAVE: - netif_carrier_off(net_dev); - dev_info(dev, "Link is down\n"); - break; - default: - dev_err(dev, "don't know how to handle event %d from uwb\n", - event); - } -} - /** * Stop the network queue * diff --git a/drivers/uwb/i1480/i1480u-wlp/rx.c b/drivers/uwb/i1480/i1480u-wlp/rx.c index 9fc035354a76..34f4cf9a7d34 100644 --- a/drivers/uwb/i1480/i1480u-wlp/rx.c +++ b/drivers/uwb/i1480/i1480u-wlp/rx.c @@ -68,11 +68,7 @@ #include <linux/etherdevice.h> #include "i1480u-wlp.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> - - -/** +/* * Setup the RX context * * Each URB is provided with a transfer_buffer that is the data field @@ -129,7 +125,7 @@ error: } -/** Release resources associated to the rx context */ +/* Release resources associated to the rx context */ void i1480u_rx_release(struct i1480u *i1480u) { int cnt; @@ -155,7 +151,7 @@ void i1480u_rx_unlink_urbs(struct i1480u *i1480u) } } -/** Fix an out-of-sequence packet */ +/* Fix an out-of-sequence packet */ #define i1480u_fix(i1480u, msg...) \ do { \ if (printk_ratelimit()) \ @@ -166,7 +162,7 @@ do { \ } while (0) -/** Drop an out-of-sequence packet */ +/* Drop an out-of-sequence packet */ #define i1480u_drop(i1480u, msg...) \ do { \ if (printk_ratelimit()) \ @@ -177,7 +173,7 @@ do { \ -/** Finalizes setting up the SKB and delivers it +/* Finalizes setting up the SKB and delivers it * * We first pass the incoming frame to WLP substack for verification. It * may also be a WLP association frame in which case WLP will take over the @@ -192,18 +188,11 @@ void i1480u_skb_deliver(struct i1480u *i1480u) struct net_device *net_dev = i1480u->net_dev; struct device *dev = &i1480u->usb_iface->dev; - d_printf(6, dev, "RX delivered pre skb(%p), %u bytes\n", - i1480u->rx_skb, i1480u->rx_skb->len); - d_dump(7, dev, i1480u->rx_skb->data, i1480u->rx_skb->len); should_parse = wlp_receive_frame(dev, &i1480u->wlp, i1480u->rx_skb, &i1480u->rx_srcaddr); if (!should_parse) goto out; i1480u->rx_skb->protocol = eth_type_trans(i1480u->rx_skb, net_dev); - d_printf(5, dev, "RX delivered skb(%p), %u bytes\n", - i1480u->rx_skb, i1480u->rx_skb->len); - d_dump(7, dev, i1480u->rx_skb->data, - i1480u->rx_skb->len > 72 ? 72 : i1480u->rx_skb->len); i1480u->stats.rx_packets++; i1480u->stats.rx_bytes += i1480u->rx_untd_pkt_size; net_dev->last_rx = jiffies; @@ -216,7 +205,7 @@ out: } -/** +/* * Process a buffer of data received from the USB RX endpoint * * First fragment arrives with next or last fragment. All other fragments @@ -404,7 +393,7 @@ out: } -/** +/* * Called when an RX URB has finished receiving or has found some kind * of error condition. * diff --git a/drivers/uwb/i1480/i1480u-wlp/sysfs.c b/drivers/uwb/i1480/i1480u-wlp/sysfs.c index a1d8ca6ac935..4ffaf546cc6c 100644 --- a/drivers/uwb/i1480/i1480u-wlp/sysfs.c +++ b/drivers/uwb/i1480/i1480u-wlp/sysfs.c @@ -25,8 +25,8 @@ #include <linux/netdevice.h> #include <linux/etherdevice.h> -#include <linux/uwb/debug.h> #include <linux/device.h> + #include "i1480u-wlp.h" @@ -226,7 +226,6 @@ ssize_t wlp_tx_inflight_store(struct i1480u_tx_inflight *inflight, * (CLASS_DEVICE_ATTR or DEVICE_ATTR) and i1480u_ATTR_NAME produces a * class_device_attr_NAME or device_attr_NAME (for group registration). */ -#include <linux/version.h> #define i1480u_SHOW(name, fn, param) \ static ssize_t i1480u_show_##name(struct device *dev, \ diff --git a/drivers/uwb/i1480/i1480u-wlp/tx.c b/drivers/uwb/i1480/i1480u-wlp/tx.c index 3426bfb68240..39032cc3503e 100644 --- a/drivers/uwb/i1480/i1480u-wlp/tx.c +++ b/drivers/uwb/i1480/i1480u-wlp/tx.c @@ -55,8 +55,6 @@ */ #include "i1480u-wlp.h" -#define D_LOCAL 5 -#include <linux/uwb/debug.h> enum { /* This is only for Next and Last TX packets */ @@ -64,7 +62,7 @@ enum { - sizeof(struct untd_hdr_rst), }; -/** Free resources allocated to a i1480u tx context. */ +/* Free resources allocated to a i1480u tx context. */ static void i1480u_tx_free(struct i1480u_tx *wtx) { @@ -99,7 +97,7 @@ void i1480u_tx_unlink_urbs(struct i1480u *i1480u) } -/** +/* * Callback for a completed tx USB URB. * * TODO: @@ -149,8 +147,6 @@ void i1480u_tx_cb(struct urb *urb) <= i1480u->tx_inflight.threshold && netif_queue_stopped(net_dev) && i1480u->tx_inflight.threshold != 0) { - if (d_test(2) && printk_ratelimit()) - d_printf(2, dev, "Restart queue. \n"); netif_start_queue(net_dev); atomic_inc(&i1480u->tx_inflight.restart_count); } @@ -158,7 +154,7 @@ void i1480u_tx_cb(struct urb *urb) } -/** +/* * Given a buffer that doesn't fit in a single fragment, create an * scatter/gather structure for delivery to the USB pipe. * @@ -253,15 +249,11 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb, /* Now do each remaining fragment */ result = -EINVAL; while (pl_size_left > 0) { - d_printf(5, NULL, "ITR HDR: pl_size_left %zu buf_itr %zu\n", - pl_size_left, buf_itr - wtx->buf); if (buf_itr + sizeof(*untd_hdr_rst) - wtx->buf > wtx->buf_size) { printk(KERN_ERR "BUG: no space for header\n"); goto error_bug; } - d_printf(5, NULL, "ITR HDR 2: pl_size_left %zu buf_itr %zu\n", - pl_size_left, buf_itr - wtx->buf); untd_hdr_rst = buf_itr; buf_itr += sizeof(*untd_hdr_rst); if (pl_size_left > i1480u_MAX_PL_SIZE) { @@ -271,9 +263,6 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb, frg_pl_size = pl_size_left; untd_hdr_set_type(&untd_hdr_rst->hdr, i1480u_PKT_FRAG_LST); } - d_printf(5, NULL, - "ITR PL: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n", - pl_size_left, buf_itr - wtx->buf, frg_pl_size); untd_hdr_set_rx_tx(&untd_hdr_rst->hdr, 0); untd_hdr_rst->hdr.len = cpu_to_le16(frg_pl_size); untd_hdr_rst->padding = 0; @@ -286,9 +275,6 @@ int i1480u_tx_create_n(struct i1480u_tx *wtx, struct sk_buff *skb, buf_itr += frg_pl_size; pl_itr += frg_pl_size; pl_size_left -= frg_pl_size; - d_printf(5, NULL, - "ITR PL 2: pl_size_left %zu buf_itr %zu frg_pl_size %zu\n", - pl_size_left, buf_itr - wtx->buf, frg_pl_size); } dev_kfree_skb_irq(skb); return 0; @@ -308,7 +294,7 @@ error_buf_alloc: } -/** +/* * Given a buffer that fits in a single fragment, fill out a @wtx * struct for transmitting it down the USB pipe. * @@ -346,7 +332,7 @@ int i1480u_tx_create_1(struct i1480u_tx *wtx, struct sk_buff *skb, } -/** +/* * Given a skb to transmit, massage it to become palatable for the TX pipe * * This will break the buffer in chunks smaller than @@ -425,7 +411,7 @@ error_wtx_alloc: return NULL; } -/** +/* * Actual fragmentation and transmission of frame * * @wlp: WLP substack data structure @@ -447,20 +433,12 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb, struct i1480u_tx *wtx; struct wlp_tx_hdr *wlp_tx_hdr; static unsigned char dev_bcast[2] = { 0xff, 0xff }; -#if 0 - int lockup = 50; -#endif - d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len, - net_dev); BUG_ON(i1480u->wlp.rc == NULL); if ((net_dev->flags & IFF_UP) == 0) goto out; result = -EBUSY; if (atomic_read(&i1480u->tx_inflight.count) >= i1480u->tx_inflight.max) { - if (d_test(2) && printk_ratelimit()) - d_printf(2, dev, "Max frames in flight " - "stopping queue.\n"); netif_stop_queue(net_dev); goto error_max_inflight; } @@ -489,21 +467,6 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb, wlp_tx_hdr_set_delivery_id_type(wlp_tx_hdr, i1480u->options.pca_base_priority); } -#if 0 - dev_info(dev, "TX delivering skb -> USB, %zu bytes\n", skb->len); - dump_bytes(dev, skb->data, skb->len > 72 ? 72 : skb->len); -#endif -#if 0 - /* simulates a device lockup after every lockup# packets */ - if (lockup && ((i1480u->stats.tx_packets + 1) % lockup) == 0) { - /* Simulate a dropped transmit interrupt */ - net_dev->trans_start = jiffies; - netif_stop_queue(net_dev); - dev_err(dev, "Simulate lockup at %ld\n", jiffies); - return result; - } -#endif - result = usb_submit_urb(wtx->urb, GFP_ATOMIC); /* Go baby */ if (result < 0) { dev_err(dev, "TX: cannot submit URB: %d\n", result); @@ -513,8 +476,6 @@ int i1480u_xmit_frame(struct wlp *wlp, struct sk_buff *skb, } atomic_inc(&i1480u->tx_inflight.count); net_dev->trans_start = jiffies; - d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len, - net_dev, result); return result; error_tx_urb_submit: @@ -522,13 +483,11 @@ error_tx_urb_submit: error_wtx_alloc: error_max_inflight: out: - d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len, - net_dev, result); return result; } -/** +/* * Transmit an skb Called when an skbuf has to be transmitted * * The skb is first passed to WLP substack to ensure this is a valid @@ -551,9 +510,6 @@ int i1480u_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) struct device *dev = &i1480u->usb_iface->dev; struct uwb_dev_addr dst; - d_fnstart(6, dev, "(skb %p (%u), net_dev %p)\n", skb, skb->len, - net_dev); - BUG_ON(i1480u->wlp.rc == NULL); if ((net_dev->flags & IFF_UP) == 0) goto error; result = wlp_prepare_tx_frame(dev, &i1480u->wlp, skb, &dst); @@ -562,31 +518,25 @@ int i1480u_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev) "Dropping packet.\n", result); goto error; } else if (result == 1) { - d_printf(6, dev, "WLP will transmit frame. \n"); /* trans_start time will be set when WLP actually transmits * the frame */ goto out; } - d_printf(6, dev, "Transmitting frame. \n"); result = i1480u_xmit_frame(&i1480u->wlp, skb, &dst); if (result < 0) { dev_err(dev, "Frame TX failed (%d).\n", result); goto error; } - d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len, - net_dev, result); return NETDEV_TX_OK; error: dev_kfree_skb_any(skb); i1480u->stats.tx_dropped++; out: - d_fnend(6, dev, "(skb %p (%u), net_dev %p) = %d\n", skb, skb->len, - net_dev, result); return NETDEV_TX_OK; } -/** +/* * Called when a pkt transmission doesn't complete in a reasonable period * Device reset may sleep - do it outside of interrupt context (delayed) */ diff --git a/drivers/uwb/ie-rcv.c b/drivers/uwb/ie-rcv.c new file mode 100644 index 000000000000..917e6d78a798 --- /dev/null +++ b/drivers/uwb/ie-rcv.c @@ -0,0 +1,55 @@ +/* + * Ultra Wide Band + * IE Received notification handling. + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/errno.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/bitmap.h> +#include "uwb-internal.h" + +/* + * Process an incoming IE Received notification. + */ +int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt) +{ + int result = -EINVAL; + struct device *dev = &evt->rc->uwb_dev.dev; + struct uwb_rc_evt_ie_rcv *iercv; + size_t iesize; + + /* Is there enough data to decode it? */ + if (evt->notif.size < sizeof(*iercv)) { + dev_err(dev, "IE Received notification: Not enough data to " + "decode (%zu vs %zu bytes needed)\n", + evt->notif.size, sizeof(*iercv)); + goto error; + } + iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb); + iesize = le16_to_cpu(iercv->wIELength); + + dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]); + + if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) { + dev_warn(dev, "unhandled Relinquish Request IE\n"); + } + + return 0; +error: + return result; +} diff --git a/drivers/uwb/ie.c b/drivers/uwb/ie.c index cf6f3d152b9d..ab976686175b 100644 --- a/drivers/uwb/ie.c +++ b/drivers/uwb/ie.c @@ -25,8 +25,6 @@ */ #include "uwb-internal.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> /** * uwb_ie_next - get the next IE in a buffer @@ -61,6 +59,42 @@ struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len) EXPORT_SYMBOL_GPL(uwb_ie_next); /** + * uwb_ie_dump_hex - print IEs to a character buffer + * @ies: the IEs to print. + * @len: length of all the IEs. + * @buf: the destination buffer. + * @size: size of @buf. + * + * Returns the number of characters written. + */ +int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len, + char *buf, size_t size) +{ + void *ptr; + const struct uwb_ie_hdr *ie; + int r = 0; + u8 *d; + + ptr = (void *)ies; + for (;;) { + ie = uwb_ie_next(&ptr, &len); + if (!ie) + break; + + r += scnprintf(buf + r, size - r, "%02x %02x", + (unsigned)ie->element_id, + (unsigned)ie->length); + d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr); + while (d != ptr && r < size) + r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++); + if (r < size) + buf[r++] = '\n'; + }; + + return r; +} + +/** * Get the IEs that a radio controller is sending in its beacon * * @uwb_rc: UWB Radio Controller @@ -70,6 +104,7 @@ EXPORT_SYMBOL_GPL(uwb_ie_next); * anything. Once done with the iedata buffer, call * uwb_rc_ie_release(iedata). Don't call kfree on it. */ +static ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie) { ssize_t result; @@ -78,148 +113,35 @@ ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie) struct uwb_rceb *reply = NULL; struct uwb_rc_evt_get_ie *get_ie; - d_fnstart(3, dev, "(%p, %p)\n", uwb_rc, pget_ie); - result = -ENOMEM; cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); if (cmd == NULL) - goto error_kzalloc; + return -ENOMEM; + cmd->bCommandType = UWB_RC_CET_GENERAL; cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE); result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd), UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE, &reply); + kfree(cmd); if (result < 0) - goto error_cmd; + return result; + get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb); if (result < sizeof(*get_ie)) { dev_err(dev, "not enough data returned for decoding GET IE " "(%zu bytes received vs %zu needed)\n", result, sizeof(*get_ie)); - result = -EINVAL; + return -EINVAL; } else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) { dev_err(dev, "not enough data returned for decoding GET IE " "payload (%zu bytes received vs %zu needed)\n", result, sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)); - result = -EINVAL; - } else - *pget_ie = get_ie; -error_cmd: - kfree(cmd); -error_kzalloc: - d_fnend(3, dev, "(%p, %p) = %d\n", uwb_rc, pget_ie, (int)result); - return result; -} -EXPORT_SYMBOL_GPL(uwb_rc_get_ie); - - -/* - * Given a pointer to an IE, print it in ASCII/hex followed by a new line - * - * @ie_hdr: pointer to the IE header. Length is in there, and it is - * guaranteed that the ie_hdr->length bytes following it are - * safely accesible. - * - * @_data: context data passed from uwb_ie_for_each(), an struct output_ctx - */ -int uwb_ie_dump_hex(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr, - size_t offset, void *_ctx) -{ - struct uwb_buf_ctx *ctx = _ctx; - const u8 *pl = (void *)(ie_hdr + 1); - u8 pl_itr; - - ctx->bytes += scnprintf(ctx->buf + ctx->bytes, ctx->size - ctx->bytes, - "%02x %02x ", (unsigned) ie_hdr->element_id, - (unsigned) ie_hdr->length); - pl_itr = 0; - while (pl_itr < ie_hdr->length && ctx->bytes < ctx->size) - ctx->bytes += scnprintf(ctx->buf + ctx->bytes, - ctx->size - ctx->bytes, - "%02x ", (unsigned) pl[pl_itr++]); - if (ctx->bytes < ctx->size) - ctx->buf[ctx->bytes++] = '\n'; - return 0; -} -EXPORT_SYMBOL_GPL(uwb_ie_dump_hex); - - -/** - * Verify that a pointer in a buffer points to valid IE - * - * @start: pointer to start of buffer in which IE appears - * @itr: pointer to IE inside buffer that will be verified - * @top: pointer to end of buffer - * - * @returns: 0 if IE is valid, <0 otherwise - * - * Verification involves checking that the buffer can contain a - * header and the amount of data reported in the IE header can be found in - * the buffer. - */ -static -int uwb_rc_ie_verify(struct uwb_dev *uwb_dev, const void *start, - const void *itr, const void *top) -{ - struct device *dev = &uwb_dev->dev; - const struct uwb_ie_hdr *ie_hdr; - - if (top - itr < sizeof(*ie_hdr)) { - dev_err(dev, "Bad IE: no data to decode header " - "(%zu bytes left vs %zu needed) at offset %zu\n", - top - itr, sizeof(*ie_hdr), itr - start); - return -EINVAL; - } - ie_hdr = itr; - itr += sizeof(*ie_hdr); - if (top - itr < ie_hdr->length) { - dev_err(dev, "Bad IE: not enough data for payload " - "(%zu bytes left vs %zu needed) at offset %zu\n", - top - itr, (size_t)ie_hdr->length, - (void *)ie_hdr - start); return -EINVAL; } - return 0; -} - -/** - * Walk a buffer filled with consecutive IE's a buffer - * - * @uwb_dev: UWB device this IEs belong to (for err messages mainly) - * - * @fn: function to call with each IE; if it returns 0, we keep - * traversing the buffer. If it returns !0, we'll stop and return - * that value. - * - * @data: pointer passed to @fn - * - * @buf: buffer where the consecutive IEs are located - * - * @size: size of @buf - * - * Each IE is checked for basic correctness (there is space left for - * the header and the payload). If that test is failed, we stop - * processing. For every good IE, @fn is called. - */ -ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data, - const void *buf, size_t size) -{ - ssize_t result = 0; - const struct uwb_ie_hdr *ie_hdr; - const void *itr = buf, *top = itr + size; - - while (itr < top) { - if (uwb_rc_ie_verify(uwb_dev, buf, itr, top) != 0) - break; - ie_hdr = itr; - itr += sizeof(*ie_hdr) + ie_hdr->length; - result = fn(uwb_dev, ie_hdr, itr - buf, data); - if (result != 0) - break; - } + *pget_ie = get_ie; return result; } -EXPORT_SYMBOL_GPL(uwb_ie_for_each); /** @@ -256,70 +178,6 @@ error_cmd: return result; } -/** - * Determine by IE id if IE is host settable - * WUSB 1.0 [8.6.2.8 Table 8.85] - * - * EXCEPTION: - * All but UWB_IE_WLP appears in Table 8.85 from WUSB 1.0. Setting this IE - * is required for the WLP substack to perform association with its WSS so - * we hope that the WUSB spec will be changed to reflect this. - */ -static -int uwb_rc_ie_is_host_settable(enum uwb_ie element_id) -{ - if (element_id == UWB_PCA_AVAILABILITY || - element_id == UWB_BP_SWITCH_IE || - element_id == UWB_MAC_CAPABILITIES_IE || - element_id == UWB_PHY_CAPABILITIES_IE || - element_id == UWB_APP_SPEC_PROBE_IE || - element_id == UWB_IDENTIFICATION_IE || - element_id == UWB_MASTER_KEY_ID_IE || - element_id == UWB_IE_WLP || - element_id == UWB_APP_SPEC_IE) - return 1; - return 0; -} - - -/** - * Extract Host Settable IEs from IE - * - * @ie_data: pointer to buffer containing all IEs - * @size: size of buffer - * - * @returns: length of buffer that only includes host settable IEs - * - * Given a buffer of IEs we move all Host Settable IEs to front of buffer - * by overwriting the IEs that are not Host Settable. - * Buffer length is adjusted accordingly. - */ -static -ssize_t uwb_rc_parse_host_settable_ie(struct uwb_dev *uwb_dev, - void *ie_data, size_t size) -{ - size_t new_len = size; - struct uwb_ie_hdr *ie_hdr; - size_t ie_length; - void *itr = ie_data, *top = itr + size; - - while (itr < top) { - if (uwb_rc_ie_verify(uwb_dev, ie_data, itr, top) != 0) - break; - ie_hdr = itr; - ie_length = sizeof(*ie_hdr) + ie_hdr->length; - if (uwb_rc_ie_is_host_settable(ie_hdr->element_id)) { - itr += ie_length; - } else { - memmove(itr, itr + ie_length, top - (itr + ie_length)); - new_len -= ie_length; - top -= ie_length; - } - } - return new_len; -} - - /* Cleanup the whole IE management subsystem */ void uwb_rc_ie_init(struct uwb_rc *uwb_rc) { @@ -328,49 +186,34 @@ void uwb_rc_ie_init(struct uwb_rc *uwb_rc) /** - * Set up cache for host settable IEs currently being transmitted + * uwb_rc_ie_setup - setup a radio controller's IE manager + * @uwb_rc: the radio controller. * - * First we just call GET-IE to get the current IEs being transmitted - * (or we workaround and pretend we did) and (because the format is - * the same) reuse that as the IE cache (with the command prefix, as - * explained in 'struct uwb_rc'). + * The current set of IEs are obtained from the hardware with a GET-IE + * command (since the radio controller is not yet beaconing this will + * be just the hardware's MAC and PHY Capability IEs). * - * @returns: size of cache created + * Returns 0 on success; -ve on an error. */ -ssize_t uwb_rc_ie_setup(struct uwb_rc *uwb_rc) +int uwb_rc_ie_setup(struct uwb_rc *uwb_rc) { - struct device *dev = &uwb_rc->uwb_dev.dev; - ssize_t result; - size_t capacity; - struct uwb_rc_evt_get_ie *ie_info; + struct uwb_rc_evt_get_ie *ie_info = NULL; + int capacity; + + capacity = uwb_rc_get_ie(uwb_rc, &ie_info); + if (capacity < 0) + return capacity; - d_fnstart(3, dev, "(%p)\n", uwb_rc); mutex_lock(&uwb_rc->ies_mutex); - result = uwb_rc_get_ie(uwb_rc, &ie_info); - if (result < 0) - goto error_get_ie; - capacity = result; - d_printf(5, dev, "Got IEs %zu bytes (%zu long at %p)\n", result, - (size_t)le16_to_cpu(ie_info->wIELength), ie_info); - - /* Remove IEs that host should not set. */ - result = uwb_rc_parse_host_settable_ie(&uwb_rc->uwb_dev, - ie_info->IEData, le16_to_cpu(ie_info->wIELength)); - if (result < 0) - goto error_parse; - d_printf(5, dev, "purged non-settable IEs to %zu bytes\n", result); - uwb_rc->ies = (void *) ie_info; + + uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info; uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL; uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE); uwb_rc->ies_capacity = capacity; - d_printf(5, dev, "IE cache at %p %zu bytes, %zu capacity\n", - ie_info, result, capacity); - result = 0; -error_parse: -error_get_ie: + mutex_unlock(&uwb_rc->ies_mutex); - d_fnend(3, dev, "(%p) = %zu\n", uwb_rc, result); - return result; + + return 0; } @@ -383,26 +226,47 @@ void uwb_rc_ie_release(struct uwb_rc *uwb_rc) } -static -int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr, - size_t offset, void *_ctx) +static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie) { - size_t *acc_size = _ctx; - *acc_size += sizeof(*ie_hdr) + ie_hdr->length; - d_printf(6, &uwb_dev->dev, "new acc size %zu\n", *acc_size); + struct uwb_rc_cmd_set_ie *new_ies; + void *ptr, *prev_ie; + struct uwb_ie_hdr *ie; + size_t length, new_ie_len, new_capacity, size, prev_size; + + length = le16_to_cpu(rc->ies->wIELength); + new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length; + new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len; + + if (new_capacity > rc->ies_capacity) { + new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL); + if (!new_ies) + return -ENOMEM; + rc->ies = new_ies; + } + + ptr = rc->ies->IEData; + size = length; + for (;;) { + prev_ie = ptr; + prev_size = size; + ie = uwb_ie_next(&ptr, &size); + if (!ie || ie->element_id > new_ie->element_id) + break; + } + + memmove(prev_ie + new_ie_len, prev_ie, prev_size); + memcpy(prev_ie, new_ie, new_ie_len); + rc->ies->wIELength = cpu_to_le16(length + new_ie_len); + return 0; } - /** - * Add a new IE to IEs currently being transmitted by device - * + * uwb_rc_ie_add - add new IEs to the radio controller's beacon + * @uwb_rc: the radio controller. * @ies: the buffer containing the new IE or IEs to be added to - * the device's beacon. The buffer will be verified for - * consistence (meaning the headers should be right) and - * consistent with the buffer size. - * @size: size of @ies (in bytes, total buffer size) - * @returns: 0 if ok, <0 errno code on error + * the device's beacon. + * @size: length of all the IEs. * * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB * after the device sent the first beacon that includes the IEs specified @@ -411,66 +275,40 @@ int __acc_size(struct uwb_dev *uwb_dev, const struct uwb_ie_hdr *ie_hdr, * we start beaconing. * * Setting an IE on the device will overwrite all current IEs in device. So - * we take the current IEs being transmitted by the device, append the + * we take the current IEs being transmitted by the device, insert the * new one, and call SET IE with all the IEs needed. * - * The local IE cache will only be updated with the new IE if SET IE - * completed successfully. + * Returns 0 on success; or -ENOMEM. */ int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size) { int result = 0; - struct device *dev = &uwb_rc->uwb_dev.dev; - struct uwb_rc_cmd_set_ie *new_ies; - size_t ies_size, total_size, acc_size = 0; - - if (uwb_rc->ies == NULL) - return -ESHUTDOWN; - uwb_ie_for_each(&uwb_rc->uwb_dev, __acc_size, &acc_size, ies, size); - if (acc_size != size) { - dev_err(dev, "BUG: bad IEs, misconstructed headers " - "[%zu bytes reported vs %zu calculated]\n", - size, acc_size); - WARN_ON(1); - return -EINVAL; - } + void *ptr; + const struct uwb_ie_hdr *ie; + mutex_lock(&uwb_rc->ies_mutex); - ies_size = le16_to_cpu(uwb_rc->ies->wIELength); - total_size = sizeof(*uwb_rc->ies) + ies_size; - if (total_size + size > uwb_rc->ies_capacity) { - d_printf(4, dev, "Reallocating IE cache from %p capacity %zu " - "to capacity %zu\n", uwb_rc->ies, uwb_rc->ies_capacity, - total_size + size); - new_ies = kzalloc(total_size + size, GFP_KERNEL); - if (new_ies == NULL) { - dev_err(dev, "No memory for adding new IE\n"); - result = -ENOMEM; - goto error_alloc; - } - memcpy(new_ies, uwb_rc->ies, total_size); - uwb_rc->ies_capacity = total_size + size; - kfree(uwb_rc->ies); - uwb_rc->ies = new_ies; - d_printf(4, dev, "New IE cache at %p capacity %zu\n", - uwb_rc->ies, uwb_rc->ies_capacity); + + ptr = (void *)ies; + for (;;) { + ie = uwb_ie_next(&ptr, &size); + if (!ie) + break; + + result = uwb_rc_ie_add_one(uwb_rc, ie); + if (result < 0) + break; } - memcpy((void *)uwb_rc->ies + total_size, ies, size); - uwb_rc->ies->wIELength = cpu_to_le16(ies_size + size); - if (uwb_rc->beaconing != -1) { - result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies); - if (result < 0) { - dev_err(dev, "Cannot set new IE on device: %d\n", - result); - uwb_rc->ies->wIELength = cpu_to_le16(ies_size); + if (result >= 0) { + if (size == 0) { + if (uwb_rc->beaconing != -1) + result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies); } else - result = 0; + result = -EINVAL; } - d_printf(4, dev, "IEs now occupy %hu bytes of %zu capacity at %p\n", - le16_to_cpu(uwb_rc->ies->wIELength), uwb_rc->ies_capacity, - uwb_rc->ies); -error_alloc: + mutex_unlock(&uwb_rc->ies_mutex); + return result; } EXPORT_SYMBOL_GPL(uwb_rc_ie_add); @@ -489,53 +327,52 @@ EXPORT_SYMBOL_GPL(uwb_rc_ie_add); * beacon. We don't reallocate, we just mark the size smaller. */ static -int uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove) +void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove) { - struct uwb_ie_hdr *ie_hdr; - size_t new_len = le16_to_cpu(uwb_rc->ies->wIELength); - void *itr = uwb_rc->ies->IEData; - void *top = itr + new_len; - - while (itr < top) { - ie_hdr = itr; - if (ie_hdr->element_id != to_remove) { - itr += sizeof(*ie_hdr) + ie_hdr->length; - } else { - int ie_length; - ie_length = sizeof(*ie_hdr) + ie_hdr->length; - if (top - itr != ie_length) - memmove(itr, itr + ie_length, top - itr + ie_length); - top -= ie_length; - new_len -= ie_length; + struct uwb_ie_hdr *ie; + size_t len = le16_to_cpu(uwb_rc->ies->wIELength); + void *ptr; + size_t size; + + ptr = uwb_rc->ies->IEData; + size = len; + for (;;) { + ie = uwb_ie_next(&ptr, &size); + if (!ie) + break; + if (ie->element_id == to_remove) { + len -= sizeof(struct uwb_ie_hdr) + ie->length; + memmove(ie, ptr, size); + ptr = ie; } } - uwb_rc->ies->wIELength = cpu_to_le16(new_len); - return 0; + uwb_rc->ies->wIELength = cpu_to_le16(len); } /** - * Remove an IE currently being transmitted by device + * uwb_rc_ie_rm - remove an IE from the radio controller's beacon + * @uwb_rc: the radio controller. + * @element_id: the element ID of the IE to remove. * - * @element_id: id of IE to be removed from device's beacon + * Only IEs previously added with uwb_rc_ie_add() may be removed. + * + * Returns 0 on success; or -ve the SET-IE command to the radio + * controller failed. */ int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id) { - struct device *dev = &uwb_rc->uwb_dev.dev; - int result; + int result = 0; - if (uwb_rc->ies == NULL) - return -ESHUTDOWN; mutex_lock(&uwb_rc->ies_mutex); - result = uwb_rc_ie_cache_rm(uwb_rc, element_id); - if (result < 0) - dev_err(dev, "Cannot remove IE from cache.\n"); - if (uwb_rc->beaconing != -1) { + + uwb_rc_ie_cache_rm(uwb_rc, element_id); + + if (uwb_rc->beaconing != -1) result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies); - if (result < 0) - dev_err(dev, "Cannot set new IE on device.\n"); - } + mutex_unlock(&uwb_rc->ies_mutex); + return result; } EXPORT_SYMBOL_GPL(uwb_rc_ie_rm); diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c index 15f856c9689a..e9fe1bb7eb23 100644 --- a/drivers/uwb/lc-dev.c +++ b/drivers/uwb/lc-dev.c @@ -22,7 +22,6 @@ * * FIXME: docs */ - #include <linux/kernel.h> #include <linux/device.h> #include <linux/err.h> @@ -30,10 +29,6 @@ #include <linux/random.h> #include "uwb-internal.h" -#define D_LOCAL 1 -#include <linux/uwb/debug.h> - - /* We initialize addresses to 0xff (invalid, as it is bcast) */ static inline void uwb_dev_addr_init(struct uwb_dev_addr *addr) { @@ -104,12 +99,9 @@ static void uwb_dev_sys_release(struct device *dev) { struct uwb_dev *uwb_dev = to_uwb_dev(dev); - d_fnstart(4, NULL, "(dev %p uwb_dev %p)\n", dev, uwb_dev); uwb_bce_put(uwb_dev->bce); - d_printf(0, &uwb_dev->dev, "uwb_dev %p freed\n", uwb_dev); memset(uwb_dev, 0x69, sizeof(*uwb_dev)); kfree(uwb_dev); - d_fnend(4, NULL, "(dev %p uwb_dev %p) = void\n", dev, uwb_dev); } /* @@ -275,12 +267,8 @@ static struct attribute_group *groups[] = { */ static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev) { - int result; struct device *dev; - d_fnstart(4, NULL, "(uwb_dev %p parent_dev %p)\n", uwb_dev, parent_dev); - BUG_ON(parent_dev == NULL); - dev = &uwb_dev->dev; /* Device sysfs files are only useful for neighbor devices not local radio controllers. */ @@ -289,18 +277,14 @@ static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev) dev->parent = parent_dev; dev_set_drvdata(dev, uwb_dev); - result = device_add(dev); - d_fnend(4, NULL, "(uwb_dev %p parent_dev %p) = %d\n", uwb_dev, parent_dev, result); - return result; + return device_add(dev); } static void __uwb_dev_sys_rm(struct uwb_dev *uwb_dev) { - d_fnstart(4, NULL, "(uwb_dev %p)\n", uwb_dev); dev_set_drvdata(&uwb_dev->dev, NULL); device_del(&uwb_dev->dev); - d_fnend(4, NULL, "(uwb_dev %p) = void\n", uwb_dev); } @@ -384,7 +368,6 @@ int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc) struct device *dev = &uwb_dev->dev; char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE]; - d_fnstart(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p)\n", dev, uwb_dev, rc); uwb_mac_addr_print(macbuf, sizeof(macbuf), &uwb_dev->mac_addr); uwb_dev_addr_print(devbuf, sizeof(devbuf), &uwb_dev->dev_addr); dev_info(dev, "uwb device (mac %s dev %s) disconnected from %s %s\n", @@ -392,8 +375,10 @@ int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc) rc ? rc->uwb_dev.dev.parent->bus->name : "n/a", rc ? dev_name(rc->uwb_dev.dev.parent) : ""); uwb_dev_rm(uwb_dev); + list_del(&uwb_dev->bce->node); + uwb_bce_put(uwb_dev->bce); uwb_dev_put(uwb_dev); /* for the creation in _onair() */ - d_fnend(3, NULL, "(dev %p [uwb_dev %p], uwb_rc %p) = 0\n", dev, uwb_dev, rc); + return 0; } diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c index ee5772f00d42..9cf21e6bb624 100644 --- a/drivers/uwb/lc-rc.c +++ b/drivers/uwb/lc-rc.c @@ -36,8 +36,6 @@ #include <linux/etherdevice.h> #include <linux/usb.h> -#define D_LOCAL 1 -#include <linux/uwb/debug.h> #include "uwb-internal.h" static int uwb_rc_index_match(struct device *dev, void *data) @@ -81,9 +79,7 @@ static void uwb_rc_sys_release(struct device *dev) struct uwb_dev *uwb_dev = container_of(dev, struct uwb_dev, dev); struct uwb_rc *rc = container_of(uwb_dev, struct uwb_rc, uwb_dev); - uwb_rc_neh_destroy(rc); uwb_rc_ie_release(rc); - d_printf(1, dev, "freed uwb_rc %p\n", rc); kfree(rc); } @@ -100,6 +96,8 @@ void uwb_rc_init(struct uwb_rc *rc) rc->scan_type = UWB_SCAN_DISABLED; INIT_LIST_HEAD(&rc->notifs_chain.list); mutex_init(&rc->notifs_chain.mutex); + INIT_LIST_HEAD(&rc->uwb_beca.list); + mutex_init(&rc->uwb_beca.mutex); uwb_drp_avail_init(rc); uwb_rc_ie_init(rc); uwb_rsv_init(rc); @@ -191,9 +189,9 @@ static int uwb_rc_setup(struct uwb_rc *rc) int result; struct device *dev = &rc->uwb_dev.dev; - result = uwb_rc_reset(rc); + result = uwb_radio_setup(rc); if (result < 0) { - dev_err(dev, "cannot reset UWB radio: %d\n", result); + dev_err(dev, "cannot setup UWB radio: %d\n", result); goto error; } result = uwb_rc_mac_addr_setup(rc); @@ -250,6 +248,12 @@ int uwb_rc_add(struct uwb_rc *rc, struct device *parent_dev, void *priv) rc->priv = priv; + init_waitqueue_head(&rc->uwbd.wq); + INIT_LIST_HEAD(&rc->uwbd.event_list); + spin_lock_init(&rc->uwbd.event_list_lock); + + uwbd_start(rc); + result = rc->start(rc); if (result < 0) goto error_rc_start; @@ -284,7 +288,7 @@ error_sys_add: error_dev_add: error_rc_setup: rc->stop(rc); - uwbd_flush(rc); + uwbd_stop(rc); error_rc_start: return result; } @@ -306,25 +310,24 @@ void uwb_rc_rm(struct uwb_rc *rc) rc->ready = 0; uwb_dbg_del_rc(rc); - uwb_rsv_cleanup(rc); - uwb_rc_ie_rm(rc, UWB_IDENTIFICATION_IE); - if (rc->beaconing >= 0) - uwb_rc_beacon(rc, -1, 0); - if (rc->scan_type != UWB_SCAN_DISABLED) - uwb_rc_scan(rc, rc->scanning, UWB_SCAN_DISABLED, 0); - uwb_rc_reset(rc); + uwb_rsv_remove_all(rc); + uwb_radio_shutdown(rc); rc->stop(rc); - uwbd_flush(rc); + + uwbd_stop(rc); + uwb_rc_neh_destroy(rc); uwb_dev_lock(&rc->uwb_dev); rc->priv = NULL; rc->cmd = NULL; uwb_dev_unlock(&rc->uwb_dev); - mutex_lock(&uwb_beca.mutex); + mutex_lock(&rc->uwb_beca.mutex); uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL); __uwb_rc_sys_rm(rc); - mutex_unlock(&uwb_beca.mutex); + mutex_unlock(&rc->uwb_beca.mutex); + uwb_rsv_cleanup(rc); + uwb_beca_release(rc); uwb_dev_rm(&rc->uwb_dev); } EXPORT_SYMBOL_GPL(uwb_rc_rm); @@ -468,28 +471,3 @@ void uwb_rc_put(struct uwb_rc *rc) __uwb_rc_put(rc); } EXPORT_SYMBOL_GPL(uwb_rc_put); - -/* - * - * - */ -ssize_t uwb_rc_print_IEs(struct uwb_rc *uwb_rc, char *buf, size_t size) -{ - ssize_t result; - struct uwb_rc_evt_get_ie *ie_info; - struct uwb_buf_ctx ctx; - - result = uwb_rc_get_ie(uwb_rc, &ie_info); - if (result < 0) - goto error_get_ie; - ctx.buf = buf; - ctx.size = size; - ctx.bytes = 0; - uwb_ie_for_each(&uwb_rc->uwb_dev, uwb_ie_dump_hex, &ctx, - ie_info->IEData, result - sizeof(*ie_info)); - result = ctx.bytes; - kfree(ie_info); -error_get_ie: - return result; -} - diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c index 9b4eb64327ac..0af8916d9bef 100644 --- a/drivers/uwb/neh.c +++ b/drivers/uwb/neh.c @@ -86,8 +86,6 @@ #include <linux/err.h> #include "uwb-internal.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> /* * UWB Radio Controller Notification/Event Handle @@ -254,7 +252,6 @@ error_kzalloc: static void __uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh) { - del_timer(&neh->timer); __uwb_rc_ctx_put(rc, neh); list_del(&neh->list_node); } @@ -275,6 +272,7 @@ void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh) __uwb_rc_neh_rm(rc, neh); spin_unlock_irqrestore(&rc->neh_lock, flags); + del_timer_sync(&neh->timer); uwb_rc_neh_put(neh); } @@ -349,7 +347,7 @@ struct uwb_rc_neh *uwb_rc_neh_lookup(struct uwb_rc *rc, } -/** +/* * Process notifications coming from the radio control interface * * @rc: UWB Radio Control Interface descriptor @@ -401,23 +399,6 @@ void uwb_rc_notif(struct uwb_rc *rc, struct uwb_rceb *rceb, ssize_t size) uwb_evt->notif.size = size; uwb_evt->notif.rceb = rceb; - switch (le16_to_cpu(rceb->wEvent)) { - /* Trap some vendor specific events - * - * FIXME: move this to handling in ptc-est, where we - * register a NULL event handler for these two guys - * using the Intel IDs. - */ - case 0x0103: - dev_info(dev, "FIXME: DEVICE ADD\n"); - return; - case 0x0104: - dev_info(dev, "FIXME: DEVICE RM\n"); - return; - default: - break; - } - uwbd_event_queue(uwb_evt); } @@ -438,9 +419,10 @@ static void uwb_rc_neh_grok_event(struct uwb_rc *rc, struct uwb_rceb *rceb, size rceb->bEventContext, size); } else { neh = uwb_rc_neh_lookup(rc, rceb); - if (neh) + if (neh) { + del_timer_sync(&neh->timer); uwb_rc_neh_cb(neh, rceb, size); - else + } else dev_warn(dev, "event 0x%02x/%04x/%02x (%zu bytes): nobody cared\n", rceb->bEventType, le16_to_cpu(rceb->wEvent), rceb->bEventContext, size); @@ -495,8 +477,6 @@ void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size) size_t size, real_size, event_size; int needtofree; - d_fnstart(3, dev, "(rc %p buf %p %zu buf_size)\n", rc, buf, buf_size); - d_printf(2, dev, "groking event block: %zu bytes\n", buf_size); itr = buf; size = buf_size; while (size > 0) { @@ -544,10 +524,7 @@ void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size) itr += real_size; size -= real_size; - d_printf(2, dev, "consumed %zd bytes, %zu left\n", - event_size, size); } - d_fnend(3, dev, "(rc %p buf %p %zu buf_size) = void\n", rc, buf, buf_size); } EXPORT_SYMBOL_GPL(uwb_rc_neh_grok); @@ -562,16 +539,22 @@ EXPORT_SYMBOL_GPL(uwb_rc_neh_grok); */ void uwb_rc_neh_error(struct uwb_rc *rc, int error) { - struct uwb_rc_neh *neh, *next; + struct uwb_rc_neh *neh; unsigned long flags; - BUG_ON(error >= 0); - spin_lock_irqsave(&rc->neh_lock, flags); - list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) { + for (;;) { + spin_lock_irqsave(&rc->neh_lock, flags); + if (list_empty(&rc->neh_list)) { + spin_unlock_irqrestore(&rc->neh_lock, flags); + break; + } + neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node); __uwb_rc_neh_rm(rc, neh); + spin_unlock_irqrestore(&rc->neh_lock, flags); + + del_timer_sync(&neh->timer); uwb_rc_neh_cb(neh, NULL, error); } - spin_unlock_irqrestore(&rc->neh_lock, flags); } EXPORT_SYMBOL_GPL(uwb_rc_neh_error); @@ -583,10 +566,14 @@ static void uwb_rc_neh_timer(unsigned long arg) unsigned long flags; spin_lock_irqsave(&rc->neh_lock, flags); - __uwb_rc_neh_rm(rc, neh); + if (neh->context) + __uwb_rc_neh_rm(rc, neh); + else + neh = NULL; spin_unlock_irqrestore(&rc->neh_lock, flags); - uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT); + if (neh) + uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT); } /** Initializes the @rc's neh subsystem @@ -605,12 +592,19 @@ void uwb_rc_neh_create(struct uwb_rc *rc) void uwb_rc_neh_destroy(struct uwb_rc *rc) { unsigned long flags; - struct uwb_rc_neh *neh, *next; + struct uwb_rc_neh *neh; - spin_lock_irqsave(&rc->neh_lock, flags); - list_for_each_entry_safe(neh, next, &rc->neh_list, list_node) { + for (;;) { + spin_lock_irqsave(&rc->neh_lock, flags); + if (list_empty(&rc->neh_list)) { + spin_unlock_irqrestore(&rc->neh_lock, flags); + break; + } + neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node); __uwb_rc_neh_rm(rc, neh); + spin_unlock_irqrestore(&rc->neh_lock, flags); + + del_timer_sync(&neh->timer); uwb_rc_neh_put(neh); } - spin_unlock_irqrestore(&rc->neh_lock, flags); } diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c index 1afb38eacb9a..99a19c199095 100644 --- a/drivers/uwb/pal.c +++ b/drivers/uwb/pal.c @@ -16,6 +16,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/kernel.h> +#include <linux/debugfs.h> #include <linux/uwb.h> #include "uwb-internal.h" @@ -32,13 +33,13 @@ EXPORT_SYMBOL_GPL(uwb_pal_init); /** * uwb_pal_register - register a UWB PAL - * @rc: the radio controller the PAL will be using * @pal: the PAL * * The PAL must be initialized with uwb_pal_init(). */ -int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) +int uwb_pal_register(struct uwb_pal *pal) { + struct uwb_rc *rc = pal->rc; int ret; if (pal->device) { @@ -54,9 +55,11 @@ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) } } - spin_lock(&rc->pal_lock); + pal->debugfs_dir = uwb_dbg_create_pal_dir(pal); + + mutex_lock(&rc->uwb_dev.mutex); list_add(&pal->node, &rc->pals); - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); return 0; } @@ -64,14 +67,19 @@ EXPORT_SYMBOL_GPL(uwb_pal_register); /** * uwb_pal_register - unregister a UWB PAL - * @rc: the radio controller the PAL was using * @pal: the PAL */ -void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal) +void uwb_pal_unregister(struct uwb_pal *pal) { - spin_lock(&rc->pal_lock); + struct uwb_rc *rc = pal->rc; + + uwb_radio_stop(pal); + + mutex_lock(&rc->uwb_dev.mutex); list_del(&pal->node); - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); + + debugfs_remove(pal->debugfs_dir); if (pal->device) { sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name); @@ -86,6 +94,5 @@ EXPORT_SYMBOL_GPL(uwb_pal_unregister); */ void uwb_rc_pal_init(struct uwb_rc *rc) { - spin_lock_init(&rc->pal_lock); INIT_LIST_HEAD(&rc->pals); } diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c new file mode 100644 index 000000000000..f0d55495f5e9 --- /dev/null +++ b/drivers/uwb/radio.c @@ -0,0 +1,202 @@ +/* + * UWB radio (channel) management. + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/uwb.h> + +#include "uwb-internal.h" + + +static int uwb_radio_select_channel(struct uwb_rc *rc) +{ + /* + * Default to channel 9 (BG1, TFC1) unless the user has + * selected a specific channel or there are no active PALs. + */ + if (rc->active_pals == 0) + return -1; + if (rc->beaconing_forced) + return rc->beaconing_forced; + return 9; +} + + +/* + * Notify all active PALs that the channel has changed. + */ +static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel) +{ + struct uwb_pal *pal; + + list_for_each_entry(pal, &rc->pals, node) { + if (pal->channel && channel != pal->channel) { + pal->channel = channel; + if (pal->channel_changed) + pal->channel_changed(pal, pal->channel); + } + } +} + +/* + * Change to a new channel and notify any active PALs of the new + * channel. + * + * When stopping the radio, PALs need to be notified first so they can + * terminate any active reservations. + */ +static int uwb_radio_change_channel(struct uwb_rc *rc, int channel) +{ + int ret = 0; + + if (channel == -1) + uwb_radio_channel_changed(rc, channel); + + if (channel != rc->beaconing) { + if (rc->beaconing != -1 && channel != -1) { + /* + * FIXME: should signal the channel change + * with a Channel Change IE. + */ + ret = uwb_radio_change_channel(rc, -1); + if (ret < 0) + return ret; + } + ret = uwb_rc_beacon(rc, channel, 0); + } + + if (channel != -1) + uwb_radio_channel_changed(rc, rc->beaconing); + + return ret; +} + +/** + * uwb_radio_start - request that the radio be started + * @pal: the PAL making the request. + * + * If the radio is not already active, aa suitable channel is selected + * and beacons are started. + */ +int uwb_radio_start(struct uwb_pal *pal) +{ + struct uwb_rc *rc = pal->rc; + int ret = 0; + + mutex_lock(&rc->uwb_dev.mutex); + + if (!pal->channel) { + pal->channel = -1; + rc->active_pals++; + ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + } + + mutex_unlock(&rc->uwb_dev.mutex); + return ret; +} +EXPORT_SYMBOL_GPL(uwb_radio_start); + +/** + * uwb_radio_stop - request tha the radio be stopped. + * @pal: the PAL making the request. + * + * Stops the radio if no other PAL is making use of it. + */ +void uwb_radio_stop(struct uwb_pal *pal) +{ + struct uwb_rc *rc = pal->rc; + + mutex_lock(&rc->uwb_dev.mutex); + + if (pal->channel) { + rc->active_pals--; + uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + pal->channel = 0; + } + + mutex_unlock(&rc->uwb_dev.mutex); +} +EXPORT_SYMBOL_GPL(uwb_radio_stop); + +/* + * uwb_radio_force_channel - force a specific channel to be used + * @rc: the radio controller. + * @channel: the channel to use; -1 to force the radio to stop; 0 to + * use the default channel selection algorithm. + */ +int uwb_radio_force_channel(struct uwb_rc *rc, int channel) +{ + int ret = 0; + + mutex_lock(&rc->uwb_dev.mutex); + + rc->beaconing_forced = channel; + ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc)); + + mutex_unlock(&rc->uwb_dev.mutex); + return ret; +} + +/* + * uwb_radio_setup - setup the radio manager + * @rc: the radio controller. + * + * The radio controller is reset to ensure it's in a known state + * before it's used. + */ +int uwb_radio_setup(struct uwb_rc *rc) +{ + return uwb_rc_reset(rc); +} + +/* + * uwb_radio_reset_state - reset any radio manager state + * @rc: the radio controller. + * + * All internal radio manager state is reset to values corresponding + * to a reset radio controller. + */ +void uwb_radio_reset_state(struct uwb_rc *rc) +{ + struct uwb_pal *pal; + + mutex_lock(&rc->uwb_dev.mutex); + + list_for_each_entry(pal, &rc->pals, node) { + if (pal->channel) { + pal->channel = -1; + if (pal->channel_changed) + pal->channel_changed(pal, -1); + } + } + + rc->beaconing = -1; + rc->scanning = -1; + + mutex_unlock(&rc->uwb_dev.mutex); +} + +/* + * uwb_radio_shutdown - shutdown the radio manager + * @rc: the radio controller. + * + * The radio controller is reset. + */ +void uwb_radio_shutdown(struct uwb_rc *rc) +{ + uwb_radio_reset_state(rc); + uwb_rc_reset(rc); +} diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c index 8de856fa7958..70f8050221ff 100644 --- a/drivers/uwb/reset.c +++ b/drivers/uwb/reset.c @@ -32,8 +32,6 @@ #include <linux/err.h> #include "uwb-internal.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> /** * Command result codes (WUSB1.0[T8-69]) @@ -323,17 +321,16 @@ int uwbd_msg_handle_reset(struct uwb_event *evt) struct uwb_rc *rc = evt->rc; int ret; - /* Need to prevent the RC hardware module going away while in - the rc->reset() call. */ - if (!try_module_get(rc->owner)) - return 0; - dev_info(&rc->uwb_dev.dev, "resetting radio controller\n"); ret = rc->reset(rc); - if (ret) + if (ret) { dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret); - - module_put(rc->owner); + goto error; + } + return 0; +error: + /* Nothing can be done except try the reset again. */ + uwb_rc_reset_all(rc); return ret; } @@ -360,3 +357,33 @@ void uwb_rc_reset_all(struct uwb_rc *rc) uwbd_event_queue(evt); } EXPORT_SYMBOL_GPL(uwb_rc_reset_all); + +void uwb_rc_pre_reset(struct uwb_rc *rc) +{ + rc->stop(rc); + uwbd_flush(rc); + + uwb_radio_reset_state(rc); + uwb_rsv_remove_all(rc); +} +EXPORT_SYMBOL_GPL(uwb_rc_pre_reset); + +void uwb_rc_post_reset(struct uwb_rc *rc) +{ + int ret; + + ret = rc->start(rc); + if (ret) + goto error; + ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr); + if (ret) + goto error; + ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr); + if (ret) + goto error; + return; +error: + /* Nothing can be done except try the reset again. */ + uwb_rc_reset_all(rc); +} +EXPORT_SYMBOL_GPL(uwb_rc_post_reset); diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c index bae16204576d..ec6eecb32f30 100644 --- a/drivers/uwb/rsv.c +++ b/drivers/uwb/rsv.c @@ -15,23 +15,33 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/uwb.h> +#include <linux/random.h> #include "uwb-internal.h" static void uwb_rsv_timer(unsigned long arg); static const char *rsv_states[] = { - [UWB_RSV_STATE_NONE] = "none", - [UWB_RSV_STATE_O_INITIATED] = "initiated", - [UWB_RSV_STATE_O_PENDING] = "pending", - [UWB_RSV_STATE_O_MODIFIED] = "modified", - [UWB_RSV_STATE_O_ESTABLISHED] = "established", - [UWB_RSV_STATE_T_ACCEPTED] = "accepted", - [UWB_RSV_STATE_T_DENIED] = "denied", - [UWB_RSV_STATE_T_PENDING] = "pending", + [UWB_RSV_STATE_NONE] = "none ", + [UWB_RSV_STATE_O_INITIATED] = "o initiated ", + [UWB_RSV_STATE_O_PENDING] = "o pending ", + [UWB_RSV_STATE_O_MODIFIED] = "o modified ", + [UWB_RSV_STATE_O_ESTABLISHED] = "o established ", + [UWB_RSV_STATE_O_TO_BE_MOVED] = "o to be moved ", + [UWB_RSV_STATE_O_MOVE_EXPANDING] = "o move expanding", + [UWB_RSV_STATE_O_MOVE_COMBINING] = "o move combining", + [UWB_RSV_STATE_O_MOVE_REDUCING] = "o move reducing ", + [UWB_RSV_STATE_T_ACCEPTED] = "t accepted ", + [UWB_RSV_STATE_T_CONFLICT] = "t conflict ", + [UWB_RSV_STATE_T_PENDING] = "t pending ", + [UWB_RSV_STATE_T_DENIED] = "t denied ", + [UWB_RSV_STATE_T_RESIZED] = "t resized ", + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ", + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf", + [UWB_RSV_STATE_T_EXPANDING_PENDING] = "t expanding pend", + [UWB_RSV_STATE_T_EXPANDING_DENIED] = "t expanding den ", }; static const char *rsv_types[] = { @@ -42,6 +52,31 @@ static const char *rsv_types[] = { [UWB_DRP_TYPE_PCA] = "pca", }; +bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv) +{ + static const bool has_two_drp_ies[] = { + [UWB_RSV_STATE_O_INITIATED] = false, + [UWB_RSV_STATE_O_PENDING] = false, + [UWB_RSV_STATE_O_MODIFIED] = false, + [UWB_RSV_STATE_O_ESTABLISHED] = false, + [UWB_RSV_STATE_O_TO_BE_MOVED] = false, + [UWB_RSV_STATE_O_MOVE_COMBINING] = false, + [UWB_RSV_STATE_O_MOVE_REDUCING] = false, + [UWB_RSV_STATE_O_MOVE_EXPANDING] = true, + [UWB_RSV_STATE_T_ACCEPTED] = false, + [UWB_RSV_STATE_T_CONFLICT] = false, + [UWB_RSV_STATE_T_PENDING] = false, + [UWB_RSV_STATE_T_DENIED] = false, + [UWB_RSV_STATE_T_RESIZED] = false, + [UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = true, + [UWB_RSV_STATE_T_EXPANDING_CONFLICT] = true, + [UWB_RSV_STATE_T_EXPANDING_PENDING] = true, + [UWB_RSV_STATE_T_EXPANDING_DENIED] = true, + }; + + return has_two_drp_ies[rsv->state]; +} + /** * uwb_rsv_state_str - return a string for a reservation state * @state: the reservation state. @@ -66,7 +101,7 @@ const char *uwb_rsv_type_str(enum uwb_drp_type type) } EXPORT_SYMBOL_GPL(uwb_rsv_type_str); -static void uwb_rsv_dump(struct uwb_rsv *rsv) +void uwb_rsv_dump(char *text, struct uwb_rsv *rsv) { struct device *dev = &rsv->rc->uwb_dev.dev; struct uwb_dev_addr devaddr; @@ -82,6 +117,23 @@ static void uwb_rsv_dump(struct uwb_rsv *rsv) dev_dbg(dev, "rsv %s -> %s: %s\n", owner, target, uwb_rsv_state_str(rsv->state)); } +static void uwb_rsv_release(struct kref *kref) +{ + struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref); + + kfree(rsv); +} + +void uwb_rsv_get(struct uwb_rsv *rsv) +{ + kref_get(&rsv->kref); +} + +void uwb_rsv_put(struct uwb_rsv *rsv) +{ + kref_put(&rsv->kref, uwb_rsv_release); +} + /* * Get a free stream index for a reservation. * @@ -92,6 +144,7 @@ static void uwb_rsv_dump(struct uwb_rsv *rsv) static int uwb_rsv_get_stream(struct uwb_rsv *rsv) { struct uwb_rc *rc = rsv->rc; + struct device *dev = &rc->uwb_dev.dev; unsigned long *streams_bm; int stream; @@ -113,12 +166,15 @@ static int uwb_rsv_get_stream(struct uwb_rsv *rsv) rsv->stream = stream; set_bit(stream, streams_bm); + dev_dbg(dev, "get stream %d\n", rsv->stream); + return 0; } static void uwb_rsv_put_stream(struct uwb_rsv *rsv) { struct uwb_rc *rc = rsv->rc; + struct device *dev = &rc->uwb_dev.dev; unsigned long *streams_bm; switch (rsv->target.type) { @@ -133,86 +189,52 @@ static void uwb_rsv_put_stream(struct uwb_rsv *rsv) } clear_bit(rsv->stream, streams_bm); + + dev_dbg(dev, "put stream %d\n", rsv->stream); } -/* - * Generate a MAS allocation with a single row component. - */ -static void uwb_rsv_gen_alloc_row(struct uwb_mas_bm *mas, - int first_mas, int mas_per_zone, - int zs, int ze) +void uwb_rsv_backoff_win_timer(unsigned long arg) { - struct uwb_mas_bm col; - int z; - - bitmap_zero(mas->bm, UWB_NUM_MAS); - bitmap_zero(col.bm, UWB_NUM_MAS); - bitmap_fill(col.bm, mas_per_zone); - bitmap_shift_left(col.bm, col.bm, first_mas + zs * UWB_MAS_PER_ZONE, UWB_NUM_MAS); - - for (z = zs; z <= ze; z++) { - bitmap_or(mas->bm, mas->bm, col.bm, UWB_NUM_MAS); - bitmap_shift_left(col.bm, col.bm, UWB_MAS_PER_ZONE, UWB_NUM_MAS); + struct uwb_drp_backoff_win *bow = (struct uwb_drp_backoff_win *)arg; + struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow); + struct device *dev = &rc->uwb_dev.dev; + + bow->can_reserve_extra_mases = true; + if (bow->total_expired <= 4) { + bow->total_expired++; + } else { + /* after 4 backoff window has expired we can exit from + * the backoff procedure */ + bow->total_expired = 0; + bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1; } + dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n: ", bow->total_expired, bow->n); + + /* try to relocate all the "to be moved" relocations */ + uwb_rsv_handle_drp_avail_change(rc); } -/* - * Allocate some MAS for this reservation based on current local - * availability, the reservation parameters (max_mas, min_mas, - * sparsity), and the WiMedia rules for MAS allocations. - * - * Returns -EBUSY is insufficient free MAS are available. - * - * FIXME: to simplify this, only safe reservations with a single row - * component in zones 1 to 15 are tried (zone 0 is skipped to avoid - * problems with the MAS reserved for the BP). - * - * [ECMA-368] section B.2. - */ -static int uwb_rsv_alloc_mas(struct uwb_rsv *rsv) +void uwb_rsv_backoff_win_increment(struct uwb_rc *rc) { - static const int safe_mas_in_row[UWB_NUM_ZONES] = { - 8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1, - }; - int n, r; - struct uwb_mas_bm mas; - bool found = false; + struct uwb_drp_backoff_win *bow = &rc->bow; + struct device *dev = &rc->uwb_dev.dev; + unsigned timeout_us; - /* - * Search all valid safe allocations until either: too few MAS - * are available; or the smallest allocation with sufficient - * MAS is found. - * - * The top of the zones are preferred, so space for larger - * allocations is available in the bottom of the zone (e.g., a - * 15 MAS allocation should start in row 14 leaving space for - * a 120 MAS allocation at row 0). - */ - for (n = safe_mas_in_row[0]; n >= 1; n--) { - int num_mas; + dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window); - num_mas = n * (UWB_NUM_ZONES - 1); - if (num_mas < rsv->min_mas) - break; - if (found && num_mas < rsv->max_mas) - break; + bow->can_reserve_extra_mases = false; - for (r = UWB_MAS_PER_ZONE-1; r >= 0; r--) { - if (safe_mas_in_row[r] < n) - continue; - uwb_rsv_gen_alloc_row(&mas, r, n, 1, UWB_NUM_ZONES); - if (uwb_drp_avail_reserve_pending(rsv->rc, &mas) == 0) { - found = true; - break; - } - } - } + if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX) + return; - if (!found) - return -EBUSY; + bow->window <<= 1; + bow->n = random32() & (bow->window - 1); + dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n); - bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS); - return 0; + /* reset the timer associated variables */ + timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US; + bow->total_expired = 0; + mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us)); } static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv) @@ -225,13 +247,16 @@ static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv) * received. */ if (rsv->is_multicast) { - if (rsv->state == UWB_RSV_STATE_O_INITIATED) + if (rsv->state == UWB_RSV_STATE_O_INITIATED + || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING + || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING + || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) sframes = 1; if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED) sframes = 0; + } - rsv->expired = false; if (sframes > 0) { /* * Add an additional 2 superframes to account for the @@ -253,7 +278,7 @@ static void uwb_rsv_state_update(struct uwb_rsv *rsv, rsv->state = new_state; rsv->ie_valid = false; - uwb_rsv_dump(rsv); + uwb_rsv_dump("SU", rsv); uwb_rsv_stroke_timer(rsv); uwb_rsv_sched_update(rsv->rc); @@ -267,10 +292,17 @@ static void uwb_rsv_callback(struct uwb_rsv *rsv) void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state) { + struct uwb_rsv_move *mv = &rsv->mv; + if (rsv->state == new_state) { switch (rsv->state) { case UWB_RSV_STATE_O_ESTABLISHED: + case UWB_RSV_STATE_O_MOVE_EXPANDING: + case UWB_RSV_STATE_O_MOVE_COMBINING: + case UWB_RSV_STATE_O_MOVE_REDUCING: case UWB_RSV_STATE_T_ACCEPTED: + case UWB_RSV_STATE_T_EXPANDING_ACCEPTED: + case UWB_RSV_STATE_T_RESIZED: case UWB_RSV_STATE_NONE: uwb_rsv_stroke_timer(rsv); break; @@ -282,10 +314,10 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state) return; } + uwb_rsv_dump("SC", rsv); + switch (new_state) { case UWB_RSV_STATE_NONE: - uwb_drp_avail_release(rsv->rc, &rsv->mas); - uwb_rsv_put_stream(rsv); uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE); uwb_rsv_callback(rsv); break; @@ -295,12 +327,45 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state) case UWB_RSV_STATE_O_PENDING: uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING); break; + case UWB_RSV_STATE_O_MODIFIED: + /* in the companion there are the MASes to drop */ + bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS); + uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED); + break; case UWB_RSV_STATE_O_ESTABLISHED: + if (rsv->state == UWB_RSV_STATE_O_MODIFIED + || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) { + uwb_drp_avail_release(rsv->rc, &mv->companion_mas); + rsv->needs_release_companion_mas = false; + } uwb_drp_avail_reserve(rsv->rc, &rsv->mas); uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED); uwb_rsv_callback(rsv); break; + case UWB_RSV_STATE_O_MOVE_EXPANDING: + rsv->needs_release_companion_mas = true; + uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING); + break; + case UWB_RSV_STATE_O_MOVE_COMBINING: + rsv->needs_release_companion_mas = false; + uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas); + bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS); + rsv->mas.safe += mv->companion_mas.safe; + rsv->mas.unsafe += mv->companion_mas.unsafe; + uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING); + break; + case UWB_RSV_STATE_O_MOVE_REDUCING: + bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS); + rsv->needs_release_companion_mas = true; + rsv->mas.safe = mv->final_mas.safe; + rsv->mas.unsafe = mv->final_mas.unsafe; + bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS); + bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS); + uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING); + break; case UWB_RSV_STATE_T_ACCEPTED: + case UWB_RSV_STATE_T_RESIZED: + rsv->needs_release_companion_mas = false; uwb_drp_avail_reserve(rsv->rc, &rsv->mas); uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED); uwb_rsv_callback(rsv); @@ -308,12 +373,82 @@ void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state) case UWB_RSV_STATE_T_DENIED: uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED); break; + case UWB_RSV_STATE_T_CONFLICT: + uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT); + break; + case UWB_RSV_STATE_T_PENDING: + uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING); + break; + case UWB_RSV_STATE_T_EXPANDING_ACCEPTED: + rsv->needs_release_companion_mas = true; + uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas); + uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED); + break; default: dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n", uwb_rsv_state_str(new_state), new_state); } } +static void uwb_rsv_handle_timeout_work(struct work_struct *work) +{ + struct uwb_rsv *rsv = container_of(work, struct uwb_rsv, + handle_timeout_work); + struct uwb_rc *rc = rsv->rc; + + mutex_lock(&rc->rsvs_mutex); + + uwb_rsv_dump("TO", rsv); + + switch (rsv->state) { + case UWB_RSV_STATE_O_INITIATED: + if (rsv->is_multicast) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + goto unlock; + } + break; + case UWB_RSV_STATE_O_MOVE_EXPANDING: + if (rsv->is_multicast) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING); + goto unlock; + } + break; + case UWB_RSV_STATE_O_MOVE_COMBINING: + if (rsv->is_multicast) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING); + goto unlock; + } + break; + case UWB_RSV_STATE_O_MOVE_REDUCING: + if (rsv->is_multicast) { + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED); + goto unlock; + } + break; + case UWB_RSV_STATE_O_ESTABLISHED: + if (rsv->is_multicast) + goto unlock; + break; + case UWB_RSV_STATE_T_EXPANDING_ACCEPTED: + /* + * The time out could be for the main or of the + * companion DRP, assume it's for the companion and + * drop that first. A further time out is required to + * drop the main. + */ + uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED); + uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas); + goto unlock; + default: + break; + } + + uwb_rsv_remove(rsv); + +unlock: + mutex_unlock(&rc->rsvs_mutex); +} + static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc) { struct uwb_rsv *rsv; @@ -324,23 +459,17 @@ static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc) INIT_LIST_HEAD(&rsv->rc_node); INIT_LIST_HEAD(&rsv->pal_node); + kref_init(&rsv->kref); init_timer(&rsv->timer); rsv->timer.function = uwb_rsv_timer; rsv->timer.data = (unsigned long)rsv; rsv->rc = rc; + INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work); return rsv; } -static void uwb_rsv_free(struct uwb_rsv *rsv) -{ - uwb_dev_put(rsv->owner); - if (rsv->target.type == UWB_RSV_TARGET_DEV) - uwb_dev_put(rsv->target.dev); - kfree(rsv); -} - /** * uwb_rsv_create - allocate and initialize a UWB reservation structure * @rc: the radio controller @@ -371,26 +500,36 @@ EXPORT_SYMBOL_GPL(uwb_rsv_create); void uwb_rsv_remove(struct uwb_rsv *rsv) { + uwb_rsv_dump("RM", rsv); + if (rsv->state != UWB_RSV_STATE_NONE) uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); + + if (rsv->needs_release_companion_mas) + uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas); + uwb_drp_avail_release(rsv->rc, &rsv->mas); + + if (uwb_rsv_is_owner(rsv)) + uwb_rsv_put_stream(rsv); + del_timer_sync(&rsv->timer); - list_del(&rsv->rc_node); - uwb_rsv_free(rsv); + uwb_dev_put(rsv->owner); + if (rsv->target.type == UWB_RSV_TARGET_DEV) + uwb_dev_put(rsv->target.dev); + + list_del_init(&rsv->rc_node); + uwb_rsv_put(rsv); } /** * uwb_rsv_destroy - free a UWB reservation structure * @rsv: the reservation to free * - * The reservation will be terminated if it is pending or established. + * The reservation must already be terminated. */ void uwb_rsv_destroy(struct uwb_rsv *rsv) { - struct uwb_rc *rc = rsv->rc; - - mutex_lock(&rc->rsvs_mutex); - uwb_rsv_remove(rsv); - mutex_unlock(&rc->rsvs_mutex); + uwb_rsv_put(rsv); } EXPORT_SYMBOL_GPL(uwb_rsv_destroy); @@ -399,7 +538,7 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy); * @rsv: the reservation * * The PAL should fill in @rsv's owner, target, type, max_mas, - * min_mas, sparsity and is_multicast fields. If the target is a + * min_mas, max_interval and is_multicast fields. If the target is a * uwb_dev it must be referenced. * * The reservation's callback will be called when the reservation is @@ -408,20 +547,32 @@ EXPORT_SYMBOL_GPL(uwb_rsv_destroy); int uwb_rsv_establish(struct uwb_rsv *rsv) { struct uwb_rc *rc = rsv->rc; + struct uwb_mas_bm available; int ret; mutex_lock(&rc->rsvs_mutex); - ret = uwb_rsv_get_stream(rsv); if (ret) goto out; - ret = uwb_rsv_alloc_mas(rsv); - if (ret) { + rsv->tiebreaker = random32() & 1; + /* get available mas bitmap */ + uwb_drp_available(rc, &available); + + ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas); + if (ret == UWB_RSV_ALLOC_NOT_FOUND) { + ret = -EBUSY; + uwb_rsv_put_stream(rsv); + goto out; + } + + ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas); + if (ret != 0) { uwb_rsv_put_stream(rsv); goto out; } + uwb_rsv_get(rsv); list_add_tail(&rsv->rc_node, &rc->reservations); rsv->owner = &rc->uwb_dev; uwb_dev_get(rsv->owner); @@ -437,16 +588,71 @@ EXPORT_SYMBOL_GPL(uwb_rsv_establish); * @rsv: the reservation to modify * @max_mas: new maximum MAS to reserve * @min_mas: new minimum MAS to reserve - * @sparsity: new sparsity to use + * @max_interval: new max_interval to use * * FIXME: implement this once there are PALs that use it. */ -int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int sparsity) +int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval) { return -ENOSYS; } EXPORT_SYMBOL_GPL(uwb_rsv_modify); +/* + * move an already established reservation (rc->rsvs_mutex must to be + * taken when tis function is called) + */ +int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available) +{ + struct uwb_rc *rc = rsv->rc; + struct uwb_drp_backoff_win *bow = &rc->bow; + struct device *dev = &rc->uwb_dev.dev; + struct uwb_rsv_move *mv; + int ret = 0; + + if (bow->can_reserve_extra_mases == false) + return -EBUSY; + + mv = &rsv->mv; + + if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) { + + if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) { + /* We want to move the reservation */ + bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS); + uwb_drp_avail_reserve_pending(rc, &mv->companion_mas); + uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING); + } + } else { + dev_dbg(dev, "new allocation not found\n"); + } + + return ret; +} + +/* It will try to move every reservation in state O_ESTABLISHED giving + * to the MAS allocator algorithm an availability that is the real one + * plus the allocation already established from the reservation. */ +void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc) +{ + struct uwb_drp_backoff_win *bow = &rc->bow; + struct uwb_rsv *rsv; + struct uwb_mas_bm mas; + + if (bow->can_reserve_extra_mases == false) + return; + + list_for_each_entry(rsv, &rc->reservations, rc_node) { + if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED || + rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) { + uwb_drp_available(rc, &mas); + bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS); + uwb_rsv_try_move(rsv, &mas); + } + } + +} + /** * uwb_rsv_terminate - terminate an established reservation * @rsv: the reservation to terminate @@ -463,7 +669,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv) mutex_lock(&rc->rsvs_mutex); - uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); + if (rsv->state != UWB_RSV_STATE_NONE) + uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE); mutex_unlock(&rc->rsvs_mutex); } @@ -477,9 +684,14 @@ EXPORT_SYMBOL_GPL(uwb_rsv_terminate); * * Reservation requests from peers are denied unless a PAL accepts it * by calling this function. + * + * The PAL call uwb_rsv_destroy() for all accepted reservations before + * calling uwb_pal_unregister(). */ void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv) { + uwb_rsv_get(rsv); + rsv->callback = cb; rsv->pal_priv = pal_priv; rsv->state = UWB_RSV_STATE_T_ACCEPTED; @@ -530,9 +742,9 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc, uwb_dev_get(rsv->owner); rsv->target.type = UWB_RSV_TARGET_DEV; rsv->target.dev = &rc->uwb_dev; + uwb_dev_get(&rc->uwb_dev); rsv->type = uwb_ie_drp_type(drp_ie); rsv->stream = uwb_ie_drp_stream_index(drp_ie); - set_bit(rsv->stream, rsv->owner->streams); uwb_drp_ie_to_bm(&rsv->mas, drp_ie); /* @@ -540,24 +752,46 @@ static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc, * deny the request. */ rsv->state = UWB_RSV_STATE_T_DENIED; - spin_lock(&rc->pal_lock); + mutex_lock(&rc->uwb_dev.mutex); list_for_each_entry(pal, &rc->pals, node) { if (pal->new_rsv) - pal->new_rsv(rsv); + pal->new_rsv(pal, rsv); if (rsv->state == UWB_RSV_STATE_T_ACCEPTED) break; } - spin_unlock(&rc->pal_lock); + mutex_unlock(&rc->uwb_dev.mutex); list_add_tail(&rsv->rc_node, &rc->reservations); state = rsv->state; rsv->state = UWB_RSV_STATE_NONE; - uwb_rsv_set_state(rsv, state); + + /* FIXME: do something sensible here */ + if (state == UWB_RSV_STATE_T_ACCEPTED + && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) { + /* FIXME: do something sensible here */ + } else { + uwb_rsv_set_state(rsv, state); + } return rsv; } /** + * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations + * @rsv: the reservation. + * @mas: returns the available MAS. + * + * The usable MAS of a reservation may be less than the negotiated MAS + * if alien BPs are present. + */ +void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas) +{ + bitmap_zero(mas->bm, UWB_NUM_MAS); + bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS); +} +EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas); + +/** * uwb_rsv_find - find a reservation for a received DRP IE. * @rc: the radio controller * @src: source of the DRP IE @@ -596,8 +830,6 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc) bool ie_updated = false; list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) { - if (rsv->expired) - uwb_drp_handle_timeout(rsv); if (!rsv->ie_valid) { uwb_drp_ie_update(rsv); ie_updated = true; @@ -607,9 +839,47 @@ static bool uwb_rsv_update_all(struct uwb_rc *rc) return ie_updated; } +void uwb_rsv_queue_update(struct uwb_rc *rc) +{ + unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE; + + queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us)); +} + +/** + * uwb_rsv_sched_update - schedule an update of the DRP IEs + * @rc: the radio controller. + * + * To improve performance and ensure correctness with [ECMA-368] the + * number of SET-DRP-IE commands that are done are limited. + * + * DRP IEs update come from two sources: DRP events from the hardware + * which all occur at the beginning of the superframe ('syncronous' + * events) and reservation establishment/termination requests from + * PALs or timers ('asynchronous' events). + * + * A delayed work ensures that all the synchronous events result in + * one SET-DRP-IE command. + * + * Additional logic (the set_drp_ie_pending and rsv_updated_postponed + * flags) will prevent an asynchrous event starting a SET-DRP-IE + * command if one is currently awaiting a response. + * + * FIXME: this does leave a window where an asynchrous event can delay + * the SET-DRP-IE for a synchronous event by one superframe. + */ void uwb_rsv_sched_update(struct uwb_rc *rc) { - queue_work(rc->rsv_workq, &rc->rsv_update_work); + spin_lock(&rc->rsvs_lock); + if (!delayed_work_pending(&rc->rsv_update_work)) { + if (rc->set_drp_ie_pending > 0) { + rc->set_drp_ie_pending++; + goto unlock; + } + uwb_rsv_queue_update(rc); + } +unlock: + spin_unlock(&rc->rsvs_lock); } /* @@ -618,7 +888,8 @@ void uwb_rsv_sched_update(struct uwb_rc *rc) */ static void uwb_rsv_update_work(struct work_struct *work) { - struct uwb_rc *rc = container_of(work, struct uwb_rc, rsv_update_work); + struct uwb_rc *rc = container_of(work, struct uwb_rc, + rsv_update_work.work); bool ie_updated; mutex_lock(&rc->rsvs_mutex); @@ -630,25 +901,71 @@ static void uwb_rsv_update_work(struct work_struct *work) ie_updated = true; } - if (ie_updated) + if (ie_updated && (rc->set_drp_ie_pending == 0)) uwb_rc_send_all_drp_ie(rc); mutex_unlock(&rc->rsvs_mutex); } +static void uwb_rsv_alien_bp_work(struct work_struct *work) +{ + struct uwb_rc *rc = container_of(work, struct uwb_rc, + rsv_alien_bp_work.work); + struct uwb_rsv *rsv; + + mutex_lock(&rc->rsvs_mutex); + + list_for_each_entry(rsv, &rc->reservations, rc_node) { + if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) { + rsv->callback(rsv); + } + } + + mutex_unlock(&rc->rsvs_mutex); +} + static void uwb_rsv_timer(unsigned long arg) { struct uwb_rsv *rsv = (struct uwb_rsv *)arg; - rsv->expired = true; - uwb_rsv_sched_update(rsv->rc); + queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work); +} + +/** + * uwb_rsv_remove_all - remove all reservations + * @rc: the radio controller + * + * A DRP IE update is not done. + */ +void uwb_rsv_remove_all(struct uwb_rc *rc) +{ + struct uwb_rsv *rsv, *t; + + mutex_lock(&rc->rsvs_mutex); + list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) { + uwb_rsv_remove(rsv); + } + /* Cancel any postponed update. */ + rc->set_drp_ie_pending = 0; + mutex_unlock(&rc->rsvs_mutex); + + cancel_delayed_work_sync(&rc->rsv_update_work); } void uwb_rsv_init(struct uwb_rc *rc) { INIT_LIST_HEAD(&rc->reservations); + INIT_LIST_HEAD(&rc->cnflt_alien_list); mutex_init(&rc->rsvs_mutex); - INIT_WORK(&rc->rsv_update_work, uwb_rsv_update_work); + spin_lock_init(&rc->rsvs_lock); + INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work); + INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work); + rc->bow.can_reserve_extra_mases = true; + rc->bow.total_expired = 0; + rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1; + init_timer(&rc->bow.timer); + rc->bow.timer.function = uwb_rsv_backoff_win_timer; + rc->bow.timer.data = (unsigned long)&rc->bow; bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS); } @@ -667,14 +984,6 @@ int uwb_rsv_setup(struct uwb_rc *rc) void uwb_rsv_cleanup(struct uwb_rc *rc) { - struct uwb_rsv *rsv, *t; - - mutex_lock(&rc->rsvs_mutex); - list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) { - uwb_rsv_remove(rsv); - } - mutex_unlock(&rc->rsvs_mutex); - - cancel_work_sync(&rc->rsv_update_work); + uwb_rsv_remove_all(rc); destroy_workqueue(rc->rsv_workq); } diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c index 2d8d62d9f53e..5ad36164c13b 100644 --- a/drivers/uwb/umc-bus.c +++ b/drivers/uwb/umc-bus.c @@ -11,23 +11,48 @@ #include <linux/uwb/umc.h> #include <linux/pci.h> -static int umc_bus_unbind_helper(struct device *dev, void *data) +static int umc_bus_pre_reset_helper(struct device *dev, void *data) { - struct device *parent = data; + int ret = 0; - if (dev->parent == parent && dev->driver) - device_release_driver(dev); - return 0; + if (dev->driver) { + struct umc_dev *umc = to_umc_dev(dev); + struct umc_driver *umc_drv = to_umc_driver(dev->driver); + + if (umc_drv->pre_reset) + ret = umc_drv->pre_reset(umc); + else + device_release_driver(dev); + } + return ret; +} + +static int umc_bus_post_reset_helper(struct device *dev, void *data) +{ + int ret = 0; + + if (dev->driver) { + struct umc_dev *umc = to_umc_dev(dev); + struct umc_driver *umc_drv = to_umc_driver(dev->driver); + + if (umc_drv->post_reset) + ret = umc_drv->post_reset(umc); + } else + ret = device_attach(dev); + + return ret; } /** * umc_controller_reset - reset the whole UMC controller * @umc: the UMC device for the radio controller. * - * Drivers will be unbound from all UMC devices belonging to the - * controller and then the radio controller will be rebound. The - * radio controller is expected to do a full hardware reset when it is - * probed. + * Drivers or all capabilities of the controller will have their + * pre_reset methods called or be unbound from their device. Then all + * post_reset methods will be called or the drivers will be rebound. + * + * Radio controllers must provide pre_reset and post_reset methods and + * reset the hardware in their start method. * * If this is called while a probe() or remove() is in progress it * will return -EAGAIN and not perform the reset. @@ -35,14 +60,13 @@ static int umc_bus_unbind_helper(struct device *dev, void *data) int umc_controller_reset(struct umc_dev *umc) { struct device *parent = umc->dev.parent; - int ret; + int ret = 0; - if (down_trylock(&parent->sem)) + if(down_trylock(&parent->sem)) return -EAGAIN; - bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper); - ret = device_attach(&umc->dev); - if (ret == 1) - ret = 0; + ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper); + if (ret >= 0) + device_for_each_child(parent, parent, umc_bus_post_reset_helper); up(&parent->sem); return ret; @@ -75,10 +99,10 @@ static int umc_bus_rescan_helper(struct device *dev, void *data) if (!dev->driver) ret = device_attach(dev); - return ret < 0 ? ret : 0; + return ret; } -static void umc_bus_rescan(void) +static void umc_bus_rescan(struct device *parent) { int err; @@ -86,7 +110,7 @@ static void umc_bus_rescan(void) * We can't use bus_rescan_devices() here as it deadlocks when * it tries to retake the dev->parent semaphore. */ - err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper); + err = device_for_each_child(parent, NULL, umc_bus_rescan_helper); if (err < 0) printk(KERN_WARNING "%s: rescan of bus failed: %d\n", KBUILD_MODNAME, err); @@ -120,7 +144,7 @@ static int umc_device_probe(struct device *dev) if (err) put_device(dev); else - umc_bus_rescan(); + umc_bus_rescan(dev->parent); return err; } diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c index aa44e1c1a102..1fc7d8270bb8 100644 --- a/drivers/uwb/umc-dev.c +++ b/drivers/uwb/umc-dev.c @@ -7,8 +7,6 @@ */ #include <linux/kernel.h> #include <linux/uwb/umc.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> static void umc_device_release(struct device *dev) { @@ -31,8 +29,7 @@ struct umc_dev *umc_device_create(struct device *parent, int n) umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL); if (umc) { - snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d", - parent->bus_id, n); + dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n); umc->dev.parent = parent; umc->dev.bus = &umc_bus_type; umc->dev.release = umc_device_release; @@ -54,8 +51,6 @@ int umc_device_register(struct umc_dev *umc) { int err; - d_fnstart(3, &umc->dev, "(umc_dev %p)\n", umc); - err = request_resource(umc->resource.parent, &umc->resource); if (err < 0) { dev_err(&umc->dev, "can't allocate resource range " @@ -69,13 +64,11 @@ int umc_device_register(struct umc_dev *umc) err = device_register(&umc->dev); if (err < 0) goto error_device_register; - d_fnend(3, &umc->dev, "(umc_dev %p) = 0\n", umc); return 0; error_device_register: release_resource(&umc->resource); error_request_resource: - d_fnend(3, &umc->dev, "(umc_dev %p) = %d\n", umc, err); return err; } EXPORT_SYMBOL_GPL(umc_device_register); @@ -95,10 +88,8 @@ void umc_device_unregister(struct umc_dev *umc) if (!umc) return; dev = get_device(&umc->dev); - d_fnstart(3, dev, "(umc_dev %p)\n", umc); device_unregister(&umc->dev); release_resource(&umc->resource); - d_fnend(3, dev, "(umc_dev %p) = void\n", umc); put_device(dev); } EXPORT_SYMBOL_GPL(umc_device_unregister); diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c index 6d232c35d07d..4a42993700c1 100644 --- a/drivers/uwb/uwb-debug.c +++ b/drivers/uwb/uwb-debug.c @@ -4,6 +4,7 @@ * * Copyright (C) 2005-2006 Intel Corporation * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version @@ -33,31 +34,9 @@ #include <linux/seq_file.h> #include <linux/uwb/debug-cmd.h> -#define D_LOCAL 0 -#include <linux/uwb/debug.h> #include "uwb-internal.h" -void dump_bytes(struct device *dev, const void *_buf, size_t rsize) -{ - const char *buf = _buf; - char line[32]; - size_t offset = 0; - int cnt, cnt2; - for (cnt = 0; cnt < rsize; cnt += 8) { - size_t rtop = rsize - cnt < 8 ? rsize - cnt : 8; - for (offset = cnt2 = 0; cnt2 < rtop; cnt2++) { - offset += scnprintf(line + offset, sizeof(line) - offset, - "%02x ", buf[cnt + cnt2] & 0xff); - } - if (dev) - dev_info(dev, "%s\n", line); - else - printk(KERN_INFO "%s\n", line); - } -} -EXPORT_SYMBOL_GPL(dump_bytes); - /* * Debug interface * @@ -84,26 +63,23 @@ struct uwb_dbg { struct dentry *reservations_f; struct dentry *accept_f; struct dentry *drp_avail_f; + spinlock_t list_lock; }; static struct dentry *root_dir; static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv) { - struct uwb_rc *rc = rsv->rc; - struct device *dev = &rc->uwb_dev.dev; - struct uwb_dev_addr devaddr; - char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE]; - - uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr); - if (rsv->target.type == UWB_RSV_TARGET_DEV) - devaddr = rsv->target.dev->dev_addr; - else - devaddr = rsv->target.devaddr; - uwb_dev_addr_print(target, sizeof(target), &devaddr); + struct uwb_dbg *dbg = rsv->pal_priv; - dev_dbg(dev, "debug: rsv %s -> %s: %s\n", - owner, target, uwb_rsv_state_str(rsv->state)); + uwb_rsv_dump("debug", rsv); + + if (rsv->state == UWB_RSV_STATE_NONE) { + spin_lock(&dbg->list_lock); + list_del(&rsv->pal_node); + spin_unlock(&dbg->list_lock); + uwb_rsv_destroy(rsv); + } } static int cmd_rsv_establish(struct uwb_rc *rc, @@ -119,26 +95,27 @@ static int cmd_rsv_establish(struct uwb_rc *rc, if (target == NULL) return -ENODEV; - rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, NULL); + rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, rc->dbg); if (rsv == NULL) { uwb_dev_put(target); return -ENOMEM; } - rsv->owner = &rc->uwb_dev; - rsv->target.type = UWB_RSV_TARGET_DEV; - rsv->target.dev = target; - rsv->type = cmd->type; - rsv->max_mas = cmd->max_mas; - rsv->min_mas = cmd->min_mas; - rsv->sparsity = cmd->sparsity; + rsv->target.type = UWB_RSV_TARGET_DEV; + rsv->target.dev = target; + rsv->type = cmd->type; + rsv->max_mas = cmd->max_mas; + rsv->min_mas = cmd->min_mas; + rsv->max_interval = cmd->max_interval; ret = uwb_rsv_establish(rsv); if (ret) uwb_rsv_destroy(rsv); - else + else { + spin_lock(&(rc->dbg)->list_lock); list_add_tail(&rsv->pal_node, &rc->dbg->rsvs); - + spin_unlock(&(rc->dbg)->list_lock); + } return ret; } @@ -148,21 +125,40 @@ static int cmd_rsv_terminate(struct uwb_rc *rc, struct uwb_rsv *rsv, *found = NULL; int i = 0; + spin_lock(&(rc->dbg)->list_lock); + list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) { if (i == cmd->index) { found = rsv; + uwb_rsv_get(found); break; } + i++; } + + spin_unlock(&(rc->dbg)->list_lock); + if (!found) return -EINVAL; - list_del(&found->pal_node); uwb_rsv_terminate(found); + uwb_rsv_put(found); return 0; } +static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add) +{ + return uwb_rc_ie_add(rc, + (const struct uwb_ie_hdr *) ie_to_add->data, + ie_to_add->len); +} + +static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm) +{ + return uwb_rc_ie_rm(rc, ie_to_rm->data[0]); +} + static int command_open(struct inode *inode, struct file *file) { file->private_data = inode->i_private; @@ -175,8 +171,8 @@ static ssize_t command_write(struct file *file, const char __user *buf, { struct uwb_rc *rc = file->private_data; struct uwb_dbg_cmd cmd; - int ret; - + int ret = 0; + if (len != sizeof(struct uwb_dbg_cmd)) return -EINVAL; @@ -190,6 +186,18 @@ static ssize_t command_write(struct file *file, const char __user *buf, case UWB_DBG_CMD_RSV_TERMINATE: ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate); break; + case UWB_DBG_CMD_IE_ADD: + ret = cmd_ie_add(rc, &cmd.ie_add); + break; + case UWB_DBG_CMD_IE_RM: + ret = cmd_ie_rm(rc, &cmd.ie_rm); + break; + case UWB_DBG_CMD_RADIO_START: + ret = uwb_radio_start(&rc->dbg->pal); + break; + case UWB_DBG_CMD_RADIO_STOP: + uwb_radio_stop(&rc->dbg->pal); + break; default: return -EINVAL; } @@ -283,12 +291,26 @@ static struct file_operations drp_avail_fops = { .owner = THIS_MODULE, }; -static void uwb_dbg_new_rsv(struct uwb_rsv *rsv) +static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel) +{ + struct device *dev = &pal->rc->uwb_dev.dev; + + if (channel > 0) + dev_info(dev, "debug: channel %d started\n", channel); + else + dev_info(dev, "debug: channel stopped\n"); +} + +static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv) { - struct uwb_rc *rc = rsv->rc; + struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal); - if (rc->dbg->accept) - uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, NULL); + if (dbg->accept) { + spin_lock(&dbg->list_lock); + list_add_tail(&rsv->pal_node, &dbg->rsvs); + spin_unlock(&dbg->list_lock); + uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg); + } } /** @@ -302,10 +324,14 @@ void uwb_dbg_add_rc(struct uwb_rc *rc) return; INIT_LIST_HEAD(&rc->dbg->rsvs); + spin_lock_init(&(rc->dbg)->list_lock); uwb_pal_init(&rc->dbg->pal); + rc->dbg->pal.rc = rc; + rc->dbg->pal.channel_changed = uwb_dbg_channel_changed; rc->dbg->pal.new_rsv = uwb_dbg_new_rsv; - uwb_pal_register(rc, &rc->dbg->pal); + uwb_pal_register(&rc->dbg->pal); + if (root_dir) { rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev), root_dir); @@ -325,7 +351,7 @@ void uwb_dbg_add_rc(struct uwb_rc *rc) } /** - * uwb_dbg_add_rc - remove a radio controller's debug interface + * uwb_dbg_del_rc - remove a radio controller's debug interface * @rc: the radio controller */ void uwb_dbg_del_rc(struct uwb_rc *rc) @@ -336,10 +362,10 @@ void uwb_dbg_del_rc(struct uwb_rc *rc) return; list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) { - uwb_rsv_destroy(rsv); + uwb_rsv_terminate(rsv); } - uwb_pal_unregister(rc, &rc->dbg->pal); + uwb_pal_unregister(&rc->dbg->pal); if (root_dir) { debugfs_remove(rc->dbg->drp_avail_f); @@ -365,3 +391,16 @@ void uwb_dbg_exit(void) { debugfs_remove(root_dir); } + +/** + * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL + * @pal: The PAL. + */ +struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal) +{ + struct uwb_rc *rc = pal->rc; + + if (root_dir && rc->dbg && rc->dbg->root_d && pal->name) + return debugfs_create_dir(pal->name, rc->dbg->root_d); + return NULL; +} diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h index 2ad307d12961..d5bcfc1c227a 100644 --- a/drivers/uwb/uwb-internal.h +++ b/drivers/uwb/uwb-internal.h @@ -66,14 +66,14 @@ extern int uwb_rc_scan(struct uwb_rc *rc, unsigned channel, enum uwb_scan_type type, unsigned bpst_offset); extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc); -extern ssize_t uwb_rc_print_IEs(struct uwb_rc *rc, char *, size_t); -extern void uwb_rc_ie_init(struct uwb_rc *); -extern void uwb_rc_ie_init(struct uwb_rc *); -extern ssize_t uwb_rc_ie_setup(struct uwb_rc *); -extern void uwb_rc_ie_release(struct uwb_rc *); -extern int uwb_rc_ie_add(struct uwb_rc *, - const struct uwb_ie_hdr *, size_t); -extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie); + +void uwb_rc_ie_init(struct uwb_rc *); +int uwb_rc_ie_setup(struct uwb_rc *); +void uwb_rc_ie_release(struct uwb_rc *); +int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len, + char *buf, size_t size); +int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *); + extern const char *uwb_rc_strerror(unsigned code); @@ -92,6 +92,12 @@ extern const char *uwb_rc_strerror(unsigned code); struct uwb_rc_neh; +extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + u8 expected_type, u16 expected_event, + uwb_rc_cmd_cb_f cb, void *arg); + + void uwb_rc_neh_create(struct uwb_rc *rc); void uwb_rc_neh_destroy(struct uwb_rc *rc); @@ -106,7 +112,69 @@ void uwb_rc_neh_put(struct uwb_rc_neh *neh); extern int uwb_est_create(void); extern void uwb_est_destroy(void); +/* + * UWB conflicting alien reservations + */ +struct uwb_cnflt_alien { + struct uwb_rc *rc; + struct list_head rc_node; + struct uwb_mas_bm mas; + struct timer_list timer; + struct work_struct cnflt_update_work; +}; + +enum uwb_uwb_rsv_alloc_result { + UWB_RSV_ALLOC_FOUND = 0, + UWB_RSV_ALLOC_NOT_FOUND, +}; + +enum uwb_rsv_mas_status { + UWB_RSV_MAS_NOT_AVAIL = 1, + UWB_RSV_MAS_SAFE, + UWB_RSV_MAS_UNSAFE, +}; + +struct uwb_rsv_col_set_info { + unsigned char start_col; + unsigned char interval; + unsigned char safe_mas_per_col; + unsigned char unsafe_mas_per_col; +}; + +struct uwb_rsv_col_info { + unsigned char max_avail_safe; + unsigned char max_avail_unsafe; + unsigned char highest_mas[UWB_MAS_PER_ZONE]; + struct uwb_rsv_col_set_info csi; +}; + +struct uwb_rsv_row_info { + unsigned char avail[UWB_MAS_PER_ZONE]; + unsigned char free_rows; + unsigned char used_rows; +}; + +/* + * UWB find allocation + */ +struct uwb_rsv_alloc_info { + unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES]; + struct uwb_rsv_col_info ci[UWB_NUM_ZONES]; + struct uwb_rsv_row_info ri; + struct uwb_mas_bm *not_available; + struct uwb_mas_bm *result; + int min_mas; + int max_mas; + int max_interval; + int total_allocated_mases; + int safe_allocated_mases; + int unsafe_allocated_mases; + int interval; +}; +int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, + struct uwb_mas_bm *result); +void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc); /* * UWB Events & management daemon */ @@ -160,13 +228,14 @@ struct uwb_event { }; }; -extern void uwbd_start(void); -extern void uwbd_stop(void); +extern void uwbd_start(struct uwb_rc *rc); +extern void uwbd_stop(struct uwb_rc *rc); extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask); extern void uwbd_event_queue(struct uwb_event *); void uwbd_flush(struct uwb_rc *rc); /* UWB event handlers */ +extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *); extern int uwbd_evt_handle_rc_beacon(struct uwb_event *); extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *); extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *); @@ -193,15 +262,6 @@ int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt); extern unsigned long beacon_timeout_ms; -/** Beacon cache list */ -struct uwb_beca { - struct list_head list; - size_t entries; - struct mutex mutex; -}; - -extern struct uwb_beca uwb_beca; - /** * Beacon cache entry * @@ -228,9 +288,6 @@ struct uwb_beca_e { struct uwb_beacon_frame; extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *, char *, size_t); -extern struct uwb_beca_e *__uwb_beca_add(struct uwb_rc_evt_beacon *, - struct uwb_beacon_frame *, - unsigned long); extern void uwb_bce_kfree(struct kref *_bce); static inline void uwb_bce_get(struct uwb_beca_e *bce) @@ -241,14 +298,19 @@ static inline void uwb_bce_put(struct uwb_beca_e *bce) { kref_put(&bce->refcnt, uwb_bce_kfree); } -extern void uwb_beca_purge(void); -extern void uwb_beca_release(void); +extern void uwb_beca_purge(struct uwb_rc *rc); +extern void uwb_beca_release(struct uwb_rc *rc); struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc, const struct uwb_dev_addr *devaddr); struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc, const struct uwb_mac_addr *macaddr); +int uwb_radio_setup(struct uwb_rc *rc); +void uwb_radio_reset_state(struct uwb_rc *rc); +void uwb_radio_shutdown(struct uwb_rc *rc); +int uwb_radio_force_channel(struct uwb_rc *rc, int channel); + /* -- UWB Sysfs representation */ extern struct class uwb_rc_class; extern struct device_attribute dev_attr_mac_address; @@ -259,18 +321,29 @@ extern struct device_attribute dev_attr_scan; void uwb_rsv_init(struct uwb_rc *rc); int uwb_rsv_setup(struct uwb_rc *rc); void uwb_rsv_cleanup(struct uwb_rc *rc); +void uwb_rsv_remove_all(struct uwb_rc *rc); +void uwb_rsv_get(struct uwb_rsv *rsv); +void uwb_rsv_put(struct uwb_rsv *rsv); +bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv); +void uwb_rsv_dump(char *text, struct uwb_rsv *rsv); +int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available); +void uwb_rsv_backoff_win_timer(unsigned long arg); +void uwb_rsv_backoff_win_increment(struct uwb_rc *rc); +int uwb_rsv_status(struct uwb_rsv *rsv); +int uwb_rsv_companion_status(struct uwb_rsv *rsv); void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state); void uwb_rsv_remove(struct uwb_rsv *rsv); struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src, struct uwb_ie_drp *drp_ie); void uwb_rsv_sched_update(struct uwb_rc *rc); +void uwb_rsv_queue_update(struct uwb_rc *rc); -void uwb_drp_handle_timeout(struct uwb_rsv *rsv); int uwb_drp_ie_update(struct uwb_rsv *rsv); void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie); void uwb_drp_avail_init(struct uwb_rc *rc); +void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail); int uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas); void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas); void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas); @@ -289,8 +362,7 @@ void uwb_dbg_init(void); void uwb_dbg_exit(void); void uwb_dbg_add_rc(struct uwb_rc *rc); void uwb_dbg_del_rc(struct uwb_rc *rc); - -/* Workarounds for version specific stuff */ +struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal); static inline void uwb_dev_lock(struct uwb_dev *uwb_dev) { diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c index 78908416e42c..57bd6bfef37e 100644 --- a/drivers/uwb/uwbd.c +++ b/drivers/uwb/uwbd.c @@ -68,17 +68,13 @@ * * Handler functions are called normally uwbd_evt_handle_*(). */ - #include <linux/kthread.h> #include <linux/module.h> #include <linux/freezer.h> -#include "uwb-internal.h" - -#define D_LOCAL 1 -#include <linux/uwb/debug.h> +#include "uwb-internal.h" -/** +/* * UWBD Event handler function signature * * Return !0 if the event needs not to be freed (ie the handler @@ -101,9 +97,12 @@ struct uwbd_event { const char *name; }; -/** Table of handlers for and properties of the UWBD Radio Control Events */ -static -struct uwbd_event uwbd_events[] = { +/* Table of handlers for and properties of the UWBD Radio Control Events */ +static struct uwbd_event uwbd_urc_events[] = { + [UWB_RC_EVT_IE_RCV] = { + .handler = uwbd_evt_handle_rc_ie_rcv, + .name = "IE_RECEIVED" + }, [UWB_RC_EVT_BEACON] = { .handler = uwbd_evt_handle_rc_beacon, .name = "BEACON_RECEIVED" @@ -142,23 +141,15 @@ struct uwbd_evt_type_handler { size_t size; }; -#define UWBD_EVT_TYPE_HANDLER(n,a) { \ - .name = (n), \ - .uwbd_events = (a), \ - .size = sizeof(a)/sizeof((a)[0]) \ -} - - -/** Table of handlers for each UWBD Event type. */ -static -struct uwbd_evt_type_handler uwbd_evt_type_handlers[] = { - [UWB_RC_CET_GENERAL] = UWBD_EVT_TYPE_HANDLER("RC", uwbd_events) +/* Table of handlers for each UWBD Event type. */ +static struct uwbd_evt_type_handler uwbd_urc_evt_type_handlers[] = { + [UWB_RC_CET_GENERAL] = { + .name = "URC", + .uwbd_events = uwbd_urc_events, + .size = ARRAY_SIZE(uwbd_urc_events), + }, }; -static const -size_t uwbd_evt_type_handlers_len = - sizeof(uwbd_evt_type_handlers) / sizeof(uwbd_evt_type_handlers[0]); - static const struct uwbd_event uwbd_message_handlers[] = { [UWB_EVT_MSG_RESET] = { .handler = uwbd_msg_handle_reset, @@ -166,9 +157,7 @@ static const struct uwbd_event uwbd_message_handlers[] = { }, }; -static DEFINE_MUTEX(uwbd_event_mutex); - -/** +/* * Handle an URC event passed to the UWB Daemon * * @evt: the event to handle @@ -188,6 +177,7 @@ static DEFINE_MUTEX(uwbd_event_mutex); static int uwbd_event_handle_urc(struct uwb_event *evt) { + int result = -EINVAL; struct uwbd_evt_type_handler *type_table; uwbd_evt_handler_f handler; u8 type, context; @@ -197,26 +187,24 @@ int uwbd_event_handle_urc(struct uwb_event *evt) event = le16_to_cpu(evt->notif.rceb->wEvent); context = evt->notif.rceb->bEventContext; - if (type > uwbd_evt_type_handlers_len) { - printk(KERN_ERR "UWBD: event type %u: unknown (too high)\n", type); - return -EINVAL; - } - type_table = &uwbd_evt_type_handlers[type]; - if (type_table->uwbd_events == NULL) { - printk(KERN_ERR "UWBD: event type %u: unknown\n", type); - return -EINVAL; - } - if (event > type_table->size) { - printk(KERN_ERR "UWBD: event %s[%u]: unknown (too high)\n", - type_table->name, event); - return -EINVAL; - } + if (type > ARRAY_SIZE(uwbd_urc_evt_type_handlers)) + goto out; + type_table = &uwbd_urc_evt_type_handlers[type]; + if (type_table->uwbd_events == NULL) + goto out; + if (event > type_table->size) + goto out; handler = type_table->uwbd_events[event].handler; - if (handler == NULL) { - printk(KERN_ERR "UWBD: event %s[%u]: unknown\n", type_table->name, event); - return -EINVAL; - } - return (*handler)(evt); + if (handler == NULL) + goto out; + + result = (*handler)(evt); +out: + if (result < 0) + dev_err(&evt->rc->uwb_dev.dev, + "UWBD: event 0x%02x/%04x/%02x, handling failed: %d\n", + type, event, context, result); + return result; } static void uwbd_event_handle_message(struct uwb_event *evt) @@ -231,19 +219,10 @@ static void uwbd_event_handle_message(struct uwb_event *evt) return; } - /* If this is a reset event we need to drop the - * uwbd_event_mutex or it deadlocks when the reset handler - * attempts to flush the uwbd events. */ - if (evt->message == UWB_EVT_MSG_RESET) - mutex_unlock(&uwbd_event_mutex); - result = uwbd_message_handlers[evt->message].handler(evt); if (result < 0) dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n", uwbd_message_handlers[evt->message].name, result); - - if (evt->message == UWB_EVT_MSG_RESET) - mutex_lock(&uwbd_event_mutex); } static void uwbd_event_handle(struct uwb_event *evt) @@ -271,20 +250,6 @@ static void uwbd_event_handle(struct uwb_event *evt) __uwb_rc_put(rc); /* for the __uwb_rc_get() in uwb_rc_notif_cb() */ } -/* The UWB Daemon */ - - -/** Daemon's PID: used to decide if we can queue or not */ -static int uwbd_pid; -/** Daemon's task struct for managing the kthread */ -static struct task_struct *uwbd_task; -/** Daemon's waitqueue for waiting for new events */ -static DECLARE_WAIT_QUEUE_HEAD(uwbd_wq); -/** Daemon's list of events; we queue/dequeue here */ -static struct list_head uwbd_event_list = LIST_HEAD_INIT(uwbd_event_list); -/** Daemon's list lock to protect concurent access */ -static DEFINE_SPINLOCK(uwbd_event_list_lock); - /** * UWB Daemon @@ -298,65 +263,58 @@ static DEFINE_SPINLOCK(uwbd_event_list_lock); * FIXME: should change so we don't have a 1HZ timer all the time, but * only if there are devices. */ -static int uwbd(void *unused) +static int uwbd(void *param) { + struct uwb_rc *rc = param; unsigned long flags; - struct list_head list = LIST_HEAD_INIT(list); - struct uwb_event *evt, *nxt; + struct uwb_event *evt; int should_stop = 0; + while (1) { wait_event_interruptible_timeout( - uwbd_wq, - !list_empty(&uwbd_event_list) + rc->uwbd.wq, + !list_empty(&rc->uwbd.event_list) || (should_stop = kthread_should_stop()), HZ); if (should_stop) break; try_to_freeze(); - mutex_lock(&uwbd_event_mutex); - spin_lock_irqsave(&uwbd_event_list_lock, flags); - list_splice_init(&uwbd_event_list, &list); - spin_unlock_irqrestore(&uwbd_event_list_lock, flags); - list_for_each_entry_safe(evt, nxt, &list, list_node) { + spin_lock_irqsave(&rc->uwbd.event_list_lock, flags); + if (!list_empty(&rc->uwbd.event_list)) { + evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node); list_del(&evt->list_node); + } else + evt = NULL; + spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags); + + if (evt) { uwbd_event_handle(evt); kfree(evt); } - mutex_unlock(&uwbd_event_mutex); - uwb_beca_purge(); /* Purge devices that left */ + uwb_beca_purge(rc); /* Purge devices that left */ } return 0; } /** Start the UWB daemon */ -void uwbd_start(void) +void uwbd_start(struct uwb_rc *rc) { - uwbd_task = kthread_run(uwbd, NULL, "uwbd"); - if (uwbd_task == NULL) + rc->uwbd.task = kthread_run(uwbd, rc, "uwbd"); + if (rc->uwbd.task == NULL) printk(KERN_ERR "UWB: Cannot start management daemon; " "UWB won't work\n"); else - uwbd_pid = uwbd_task->pid; + rc->uwbd.pid = rc->uwbd.task->pid; } /* Stop the UWB daemon and free any unprocessed events */ -void uwbd_stop(void) +void uwbd_stop(struct uwb_rc *rc) { - unsigned long flags; - struct uwb_event *evt, *nxt; - kthread_stop(uwbd_task); - spin_lock_irqsave(&uwbd_event_list_lock, flags); - uwbd_pid = 0; - list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) { - if (evt->type == UWB_EVT_TYPE_NOTIF) - kfree(evt->notif.rceb); - kfree(evt); - } - spin_unlock_irqrestore(&uwbd_event_list_lock, flags); - uwb_beca_release(); + kthread_stop(rc->uwbd.task); + uwbd_flush(rc); } /* @@ -373,18 +331,20 @@ void uwbd_stop(void) */ void uwbd_event_queue(struct uwb_event *evt) { + struct uwb_rc *rc = evt->rc; unsigned long flags; - spin_lock_irqsave(&uwbd_event_list_lock, flags); - if (uwbd_pid != 0) { - list_add(&evt->list_node, &uwbd_event_list); - wake_up_all(&uwbd_wq); + + spin_lock_irqsave(&rc->uwbd.event_list_lock, flags); + if (rc->uwbd.pid != 0) { + list_add(&evt->list_node, &rc->uwbd.event_list); + wake_up_all(&rc->uwbd.wq); } else { __uwb_rc_put(evt->rc); if (evt->type == UWB_EVT_TYPE_NOTIF) kfree(evt->notif.rceb); kfree(evt); } - spin_unlock_irqrestore(&uwbd_event_list_lock, flags); + spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags); return; } @@ -392,10 +352,8 @@ void uwbd_flush(struct uwb_rc *rc) { struct uwb_event *evt, *nxt; - mutex_lock(&uwbd_event_mutex); - - spin_lock_irq(&uwbd_event_list_lock); - list_for_each_entry_safe(evt, nxt, &uwbd_event_list, list_node) { + spin_lock_irq(&rc->uwbd.event_list_lock); + list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) { if (evt->rc == rc) { __uwb_rc_put(rc); list_del(&evt->list_node); @@ -404,7 +362,5 @@ void uwbd_flush(struct uwb_rc *rc) kfree(evt); } } - spin_unlock_irq(&uwbd_event_list_lock); - - mutex_unlock(&uwbd_event_mutex); + spin_unlock_irq(&rc->uwbd.event_list_lock); } diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c index 1711deadb114..19a1dd129212 100644 --- a/drivers/uwb/whc-rc.c +++ b/drivers/uwb/whc-rc.c @@ -39,7 +39,6 @@ * them to the hw and transfer the replies/notifications back to the * UWB stack through the UWB daemon (UWBD). */ -#include <linux/version.h> #include <linux/init.h> #include <linux/module.h> #include <linux/pci.h> @@ -49,10 +48,8 @@ #include <linux/uwb.h> #include <linux/uwb/whci.h> #include <linux/uwb/umc.h> -#include "uwb-internal.h" -#define D_LOCAL 0 -#include <linux/uwb/debug.h> +#include "uwb-internal.h" /** * Descriptor for an instance of the UWB Radio Control Driver that @@ -98,13 +95,8 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc, struct device *dev = &whcrc->umc_dev->dev; u32 urccmd; - d_fnstart(3, dev, "(%p, %p, %zu)\n", uwb_rc, cmd, cmd_size); - might_sleep(); - - if (cmd_size >= 4096) { - result = -E2BIG; - goto error; - } + if (cmd_size >= 4096) + return -EINVAL; /* * If the URC is halted, then the hardware has reset itself. @@ -115,16 +107,14 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc, if (le_readl(whcrc->rc_base + URCSTS) & URCSTS_HALTED) { dev_err(dev, "requesting reset of halted radio controller\n"); uwb_rc_reset_all(uwb_rc); - result = -EIO; - goto error; + return -EIO; } result = wait_event_timeout(whcrc->cmd_wq, !(le_readl(whcrc->rc_base + URCCMD) & URCCMD_ACTIVE), HZ/2); if (result == 0) { dev_err(dev, "device is not ready to execute commands\n"); - result = -ETIMEDOUT; - goto error; + return -ETIMEDOUT; } memmove(whcrc->cmd_buf, cmd, cmd_size); @@ -137,10 +127,7 @@ static int whcrc_cmd(struct uwb_rc *uwb_rc, whcrc->rc_base + URCCMD); spin_unlock(&whcrc->irq_lock); -error: - d_fnend(3, dev, "(%p, %p, %zu) = %d\n", - uwb_rc, cmd, cmd_size, result); - return result; + return 0; } static int whcrc_reset(struct uwb_rc *rc) @@ -167,34 +154,25 @@ static int whcrc_reset(struct uwb_rc *rc) static void whcrc_enable_events(struct whcrc *whcrc) { - struct device *dev = &whcrc->umc_dev->dev; u32 urccmd; - d_fnstart(4, dev, "(whcrc %p)\n", whcrc); - le_writeq(whcrc->evt_dma_buf, whcrc->rc_base + URCEVTADDR); spin_lock(&whcrc->irq_lock); urccmd = le_readl(whcrc->rc_base + URCCMD) & ~URCCMD_ACTIVE; le_writel(urccmd | URCCMD_EARV, whcrc->rc_base + URCCMD); spin_unlock(&whcrc->irq_lock); - - d_fnend(4, dev, "(whcrc %p) = void\n", whcrc); } static void whcrc_event_work(struct work_struct *work) { struct whcrc *whcrc = container_of(work, struct whcrc, event_work); - struct device *dev = &whcrc->umc_dev->dev; size_t size; u64 urcevtaddr; urcevtaddr = le_readq(whcrc->rc_base + URCEVTADDR); size = urcevtaddr & URCEVTADDR_OFFSET_MASK; - d_printf(3, dev, "received %zu octet event\n", size); - d_dump(4, dev, whcrc->evt_buf, size > 32 ? 32 : size); - uwb_rc_neh_grok(whcrc->uwb_rc, whcrc->evt_buf, size); whcrc_enable_events(whcrc); } @@ -217,22 +195,15 @@ irqreturn_t whcrc_irq_cb(int irq, void *_whcrc) return IRQ_NONE; le_writel(urcsts & URCSTS_INT_MASK, whcrc->rc_base + URCSTS); - d_printf(4, dev, "acked 0x%08x, urcsts 0x%08x\n", - le_readl(whcrc->rc_base + URCSTS), urcsts); - if (urcsts & URCSTS_HSE) { dev_err(dev, "host system error -- hardware halted\n"); /* FIXME: do something sensible here */ goto out; } - if (urcsts & URCSTS_ER) { - d_printf(3, dev, "ER: event ready\n"); + if (urcsts & URCSTS_ER) schedule_work(&whcrc->event_work); - } - if (urcsts & URCSTS_RCI) { - d_printf(3, dev, "RCI: ready to execute another command\n"); + if (urcsts & URCSTS_RCI) wake_up_all(&whcrc->cmd_wq); - } out: return IRQ_HANDLED; } @@ -251,8 +222,7 @@ int whcrc_setup_rc_umc(struct whcrc *whcrc) whcrc->area = umc_dev->resource.start; whcrc->rc_len = umc_dev->resource.end - umc_dev->resource.start + 1; result = -EBUSY; - if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) - == NULL) { + if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) == NULL) { dev_err(dev, "can't request URC region (%zu bytes @ 0x%lx): %d\n", whcrc->rc_len, whcrc->area, result); goto error_request_region; @@ -287,8 +257,6 @@ int whcrc_setup_rc_umc(struct whcrc *whcrc) dev_err(dev, "Can't allocate evt transfer buffer\n"); goto error_evt_buffer; } - d_printf(3, dev, "UWB RC Interface: %zu bytes at 0x%p, irq %u\n", - whcrc->rc_len, whcrc->rc_base, umc_dev->irq); return 0; error_evt_buffer: @@ -333,47 +301,23 @@ void whcrc_release_rc_umc(struct whcrc *whcrc) static int whcrc_start_rc(struct uwb_rc *rc) { struct whcrc *whcrc = rc->priv; - int result = 0; struct device *dev = &whcrc->umc_dev->dev; - unsigned long start, duration; /* Reset the thing */ le_writel(URCCMD_RESET, whcrc->rc_base + URCCMD); - if (d_test(3)) - start = jiffies; if (whci_wait_for(dev, whcrc->rc_base + URCCMD, URCCMD_RESET, 0, - 5000, "device to reset at init") < 0) { - result = -EBUSY; - goto error; - } else if (d_test(3)) { - duration = jiffies - start; - if (duration > msecs_to_jiffies(40)) - dev_err(dev, "Device took %ums to " - "reset. MAX expected: 40ms\n", - jiffies_to_msecs(duration)); - } + 5000, "hardware reset") < 0) + return -EBUSY; /* Set the event buffer, start the controller (enable IRQs later) */ le_writel(0, whcrc->rc_base + URCINTR); le_writel(URCCMD_RS, whcrc->rc_base + URCCMD); - result = -ETIMEDOUT; - if (d_test(3)) - start = jiffies; if (whci_wait_for(dev, whcrc->rc_base + URCSTS, URCSTS_HALTED, 0, - 5000, "device to start") < 0) - goto error; - if (d_test(3)) { - duration = jiffies - start; - if (duration > msecs_to_jiffies(40)) - dev_err(dev, "Device took %ums to start. " - "MAX expected: 40ms\n", - jiffies_to_msecs(duration)); - } + 5000, "radio controller start") < 0) + return -ETIMEDOUT; whcrc_enable_events(whcrc); - result = 0; le_writel(URCINTR_EN_ALL, whcrc->rc_base + URCINTR); -error: - return result; + return 0; } @@ -395,7 +339,7 @@ void whcrc_stop_rc(struct uwb_rc *rc) le_writel(0, whcrc->rc_base + URCCMD); whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS, - URCSTS_HALTED, 0, 40, "URCSTS.HALTED"); + URCSTS_HALTED, URCSTS_HALTED, 100, "radio controller stop"); } static void whcrc_init(struct whcrc *whcrc) @@ -421,7 +365,6 @@ int whcrc_probe(struct umc_dev *umc_dev) struct whcrc *whcrc; struct device *dev = &umc_dev->dev; - d_fnstart(3, dev, "(umc_dev %p)\n", umc_dev); result = -ENOMEM; uwb_rc = uwb_rc_alloc(); if (uwb_rc == NULL) { @@ -453,7 +396,6 @@ int whcrc_probe(struct umc_dev *umc_dev) if (result < 0) goto error_rc_add; umc_set_drvdata(umc_dev, whcrc); - d_fnend(3, dev, "(umc_dev %p) = 0\n", umc_dev); return 0; error_rc_add: @@ -463,7 +405,6 @@ error_setup_rc_umc: error_alloc: uwb_rc_put(uwb_rc); error_rc_alloc: - d_fnend(3, dev, "(umc_dev %p) = %d\n", umc_dev, result); return result; } @@ -486,7 +427,24 @@ static void whcrc_remove(struct umc_dev *umc_dev) whcrc_release_rc_umc(whcrc); kfree(whcrc); uwb_rc_put(uwb_rc); - d_printf(1, &umc_dev->dev, "freed whcrc %p\n", whcrc); +} + +static int whcrc_pre_reset(struct umc_dev *umc) +{ + struct whcrc *whcrc = umc_get_drvdata(umc); + struct uwb_rc *uwb_rc = whcrc->uwb_rc; + + uwb_rc_pre_reset(uwb_rc); + return 0; +} + +static int whcrc_post_reset(struct umc_dev *umc) +{ + struct whcrc *whcrc = umc_get_drvdata(umc); + struct uwb_rc *uwb_rc = whcrc->uwb_rc; + + uwb_rc_post_reset(uwb_rc); + return 0; } /* PCI device ID's that we handle [so it gets loaded] */ @@ -497,10 +455,12 @@ static struct pci_device_id whcrc_id_table[] = { MODULE_DEVICE_TABLE(pci, whcrc_id_table); static struct umc_driver whcrc_driver = { - .name = "whc-rc", - .cap_id = UMC_CAP_ID_WHCI_RC, - .probe = whcrc_probe, - .remove = whcrc_remove, + .name = "whc-rc", + .cap_id = UMC_CAP_ID_WHCI_RC, + .probe = whcrc_probe, + .remove = whcrc_remove, + .pre_reset = whcrc_pre_reset, + .post_reset = whcrc_post_reset, }; static int __init whcrc_driver_init(void) diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c index 3df2388f908f..1f8964ed9882 100644 --- a/drivers/uwb/whci.c +++ b/drivers/uwb/whci.c @@ -67,11 +67,11 @@ int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result, val = le_readl(reg); if ((val & mask) == result) break; - msleep(10); if (t >= max_ms) { - dev_err(dev, "timed out waiting for %s ", tag); + dev_err(dev, "%s timed out\n", tag); return -ETIMEDOUT; } + msleep(10); t += 10; } return 0; @@ -111,7 +111,7 @@ static int whci_add_cap(struct whci_card *card, int n) + UWBCAPDATA_TO_OFFSET(capdata); umc->resource.end = umc->resource.start + (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1; - umc->resource.name = umc->dev.bus_id; + umc->resource.name = dev_name(&umc->dev); umc->resource.flags = card->pci->resource[bar].flags; umc->resource.parent = &card->pci->resource[bar]; umc->irq = card->pci->irq; diff --git a/drivers/uwb/wlp/eda.c b/drivers/uwb/wlp/eda.c index 10985fa233cc..69e020039718 100644 --- a/drivers/uwb/wlp/eda.c +++ b/drivers/uwb/wlp/eda.c @@ -51,9 +51,7 @@ * the tag and address of the transmitting neighbor. */ -#define D_LOCAL 5 #include <linux/netdevice.h> -#include <linux/uwb/debug.h> #include <linux/etherdevice.h> #include <linux/wlp.h> #include "wlp-internal.h" @@ -304,7 +302,6 @@ int wlp_eda_for_virtual(struct wlp_eda *eda, { int result = 0; struct wlp *wlp = container_of(eda, struct wlp, eda); - struct device *dev = &wlp->rc->uwb_dev.dev; struct wlp_eda_node *itr; unsigned long flags; int found = 0; @@ -313,26 +310,14 @@ int wlp_eda_for_virtual(struct wlp_eda *eda, list_for_each_entry(itr, &eda->cache, list_node) { if (!memcmp(itr->virt_addr, virt_addr, sizeof(itr->virt_addr))) { - d_printf(6, dev, "EDA: looking for %pM hit %02x:%02x " - "wss %p tag 0x%02x state %u\n", - virt_addr, - itr->dev_addr.data[1], - itr->dev_addr.data[0], itr->wss, - itr->tag, itr->state); result = (*function)(wlp, itr, priv); *dev_addr = itr->dev_addr; found = 1; break; - } else - d_printf(6, dev, "EDA: looking for %pM against %pM miss\n", - virt_addr, itr->virt_addr); + } } - if (!found) { - if (printk_ratelimit()) - dev_err(dev, "EDA: Eth addr %pM not found.\n", - virt_addr); + if (!found) result = -ENODEV; - } spin_unlock_irqrestore(&eda->lock, flags); return result; } diff --git a/drivers/uwb/wlp/messages.c b/drivers/uwb/wlp/messages.c index a64cb8241713..aa42fcee4c4f 100644 --- a/drivers/uwb/wlp/messages.c +++ b/drivers/uwb/wlp/messages.c @@ -24,8 +24,7 @@ */ #include <linux/wlp.h> -#define D_LOCAL 6 -#include <linux/uwb/debug.h> + #include "wlp-internal.h" static @@ -105,24 +104,18 @@ static inline void wlp_set_attr_hdr(struct wlp_attr_hdr *hdr, unsigned type, #define wlp_set(type, type_code, name) \ static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value) \ { \ - d_fnstart(6, NULL, "(attribute %p)\n", attr); \ wlp_set_attr_hdr(&attr->hdr, type_code, \ sizeof(*attr) - sizeof(struct wlp_attr_hdr)); \ attr->name = value; \ - d_dump(6, NULL, attr, sizeof(*attr)); \ - d_fnend(6, NULL, "(attribute %p)\n", attr); \ return sizeof(*attr); \ } #define wlp_pset(type, type_code, name) \ static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value) \ { \ - d_fnstart(6, NULL, "(attribute %p)\n", attr); \ wlp_set_attr_hdr(&attr->hdr, type_code, \ sizeof(*attr) - sizeof(struct wlp_attr_hdr)); \ attr->name = *value; \ - d_dump(6, NULL, attr, sizeof(*attr)); \ - d_fnend(6, NULL, "(attribute %p)\n", attr); \ return sizeof(*attr); \ } @@ -139,11 +132,8 @@ static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value) \ static size_t wlp_set_##name(struct wlp_attr_##name *attr, type value, \ size_t len) \ { \ - d_fnstart(6, NULL, "(attribute %p)\n", attr); \ wlp_set_attr_hdr(&attr->hdr, type_code, len); \ memcpy(attr->name, value, len); \ - d_dump(6, NULL, attr, sizeof(*attr) + len); \ - d_fnend(6, NULL, "(attribute %p)\n", attr); \ return sizeof(*attr) + len; \ } @@ -182,7 +172,7 @@ static size_t wlp_set_wss_info(struct wlp_attr_wss_info *attr, size_t datalen; void *ptr = attr->wss_info; size_t used = sizeof(*attr); - d_fnstart(6, NULL, "(attribute %p)\n", attr); + datalen = sizeof(struct wlp_wss_info) + strlen(wss->name); wlp_set_attr_hdr(&attr->hdr, WLP_ATTR_WSS_INFO, datalen); used = wlp_set_wssid(ptr, &wss->wssid); @@ -190,9 +180,6 @@ static size_t wlp_set_wss_info(struct wlp_attr_wss_info *attr, used += wlp_set_accept_enrl(ptr + used, wss->accept_enroll); used += wlp_set_wss_sec_status(ptr + used, wss->secure_status); used += wlp_set_wss_bcast(ptr + used, &wss->bcast); - d_dump(6, NULL, attr, sizeof(*attr) + datalen); - d_fnend(6, NULL, "(attribute %p, used %d)\n", - attr, (int)(sizeof(*attr) + used)); return sizeof(*attr) + used; } @@ -414,7 +401,6 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp, size_t used = 0; ssize_t result = -EINVAL; - d_printf(6, dev, "WLP: WSS info: Retrieving WSS name\n"); result = wlp_get_wss_name(wlp, ptr, info->name, buflen); if (result < 0) { dev_err(dev, "WLP: unable to obtain WSS name from " @@ -422,7 +408,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp, goto error_parse; } used += result; - d_printf(6, dev, "WLP: WSS info: Retrieving accept enroll\n"); + result = wlp_get_accept_enrl(wlp, ptr + used, &info->accept_enroll, buflen - used); if (result < 0) { @@ -437,7 +423,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp, goto error_parse; } used += result; - d_printf(6, dev, "WLP: WSS info: Retrieving secure status\n"); + result = wlp_get_wss_sec_status(wlp, ptr + used, &info->sec_status, buflen - used); if (result < 0) { @@ -452,7 +438,7 @@ static ssize_t wlp_get_wss_info_attrs(struct wlp *wlp, goto error_parse; } used += result; - d_printf(6, dev, "WLP: WSS info: Retrieving broadcast\n"); + result = wlp_get_wss_bcast(wlp, ptr + used, &info->bcast, buflen - used); if (result < 0) { @@ -530,7 +516,7 @@ static ssize_t wlp_get_wss_info(struct wlp *wlp, struct wlp_attr_wss_info *attr, len = result; used = sizeof(*attr); ptr = attr; - d_printf(6, dev, "WLP: WSS info: Retrieving WSSID\n"); + result = wlp_get_wssid(wlp, ptr + used, wssid, buflen - used); if (result < 0) { dev_err(dev, "WLP: unable to obtain WSSID from WSS info.\n"); @@ -553,8 +539,6 @@ static ssize_t wlp_get_wss_info(struct wlp *wlp, struct wlp_attr_wss_info *attr, goto out; } result = used; - d_printf(6, dev, "WLP: Successfully parsed WLP information " - "attribute. used %zu bytes\n", used); out: return result; } @@ -598,8 +582,6 @@ static ssize_t wlp_get_all_wss_info(struct wlp *wlp, struct wlp_wssid_e *wssid_e; char buf[WLP_WSS_UUID_STRSIZE]; - d_fnstart(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d \n", - wlp, attr, neighbor, wss, (int)buflen); if (buflen < 0) goto out; @@ -638,8 +620,7 @@ static ssize_t wlp_get_all_wss_info(struct wlp *wlp, wss->accept_enroll = wss_info.accept_enroll; wss->state = WLP_WSS_STATE_PART_ENROLLED; wlp_wss_uuid_print(buf, sizeof(buf), &wssid); - d_printf(2, dev, "WLP: Found WSS %s. Enrolling.\n", - buf); + dev_dbg(dev, "WLP: Found WSS %s. Enrolling.\n", buf); } else { wssid_e = wlp_create_wssid_e(wlp, neighbor); if (wssid_e == NULL) { @@ -660,9 +641,6 @@ error_parse: if (result < 0 && !enroll) /* this was a discovery */ wlp_remove_neighbor_tmp_info(neighbor); out: - d_fnend(6, dev, "wlp %p, attr %p, neighbor %p, wss %p, buflen %d, " - "result %d \n", wlp, attr, neighbor, wss, (int)buflen, - (int)result); return result; } @@ -718,7 +696,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss, struct sk_buff *_skb; void *d1_itr; - d_fnstart(6, dev, "wlp %p\n", wlp); if (wlp->dev_info == NULL) { result = __wlp_setup_device_info(wlp); if (result < 0) { @@ -728,24 +705,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss, } } info = wlp->dev_info; - d_printf(6, dev, "Local properties:\n" - "Device name (%d bytes): %s\n" - "Model name (%d bytes): %s\n" - "Manufacturer (%d bytes): %s\n" - "Model number (%d bytes): %s\n" - "Serial number (%d bytes): %s\n" - "Primary device type: \n" - " Category: %d \n" - " OUI: %02x:%02x:%02x \n" - " OUI Subdivision: %u \n", - (int)strlen(info->name), info->name, - (int)strlen(info->model_name), info->model_name, - (int)strlen(info->manufacturer), info->manufacturer, - (int)strlen(info->model_nr), info->model_nr, - (int)strlen(info->serial), info->serial, - info->prim_dev_type.category, - info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1], - info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv); _skb = dev_alloc_skb(sizeof(*_d1) + sizeof(struct wlp_attr_uuid_e) + sizeof(struct wlp_attr_wss_sel_mthd) @@ -768,7 +727,6 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss, goto error; } _d1 = (void *) _skb->data; - d_printf(6, dev, "D1 starts at %p \n", _d1); _d1->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID); _d1->hdr.type = WLP_FRAME_ASSOCIATION; _d1->type = WLP_ASSOC_D1; @@ -791,25 +749,8 @@ static int wlp_build_assoc_d1(struct wlp *wlp, struct wlp_wss *wss, used += wlp_set_prim_dev_type(d1_itr + used, &info->prim_dev_type); used += wlp_set_wlp_assc_err(d1_itr + used, WLP_ASSOC_ERROR_NONE); skb_put(_skb, sizeof(*_d1) + used); - d_printf(6, dev, "D1 message:\n"); - d_dump(6, dev, _d1, sizeof(*_d1) - + sizeof(struct wlp_attr_uuid_e) - + sizeof(struct wlp_attr_wss_sel_mthd) - + sizeof(struct wlp_attr_dev_name) - + strlen(info->name) - + sizeof(struct wlp_attr_manufacturer) - + strlen(info->manufacturer) - + sizeof(struct wlp_attr_model_name) - + strlen(info->model_name) - + sizeof(struct wlp_attr_model_nr) - + strlen(info->model_nr) - + sizeof(struct wlp_attr_serial) - + strlen(info->serial) - + sizeof(struct wlp_attr_prim_dev_type) - + sizeof(struct wlp_attr_wlp_assc_err)); *skb = _skb; error: - d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result); return result; } @@ -837,7 +778,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss, void *d2_itr; size_t mem_needed; - d_fnstart(6, dev, "wlp %p\n", wlp); if (wlp->dev_info == NULL) { result = __wlp_setup_device_info(wlp); if (result < 0) { @@ -847,24 +787,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss, } } info = wlp->dev_info; - d_printf(6, dev, "Local properties:\n" - "Device name (%d bytes): %s\n" - "Model name (%d bytes): %s\n" - "Manufacturer (%d bytes): %s\n" - "Model number (%d bytes): %s\n" - "Serial number (%d bytes): %s\n" - "Primary device type: \n" - " Category: %d \n" - " OUI: %02x:%02x:%02x \n" - " OUI Subdivision: %u \n", - (int)strlen(info->name), info->name, - (int)strlen(info->model_name), info->model_name, - (int)strlen(info->manufacturer), info->manufacturer, - (int)strlen(info->model_nr), info->model_nr, - (int)strlen(info->serial), info->serial, - info->prim_dev_type.category, - info->prim_dev_type.OUI[0], info->prim_dev_type.OUI[1], - info->prim_dev_type.OUI[2], info->prim_dev_type.OUIsubdiv); mem_needed = sizeof(*_d2) + sizeof(struct wlp_attr_uuid_e) + sizeof(struct wlp_attr_uuid_r) @@ -892,7 +814,6 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss, goto error; } _d2 = (void *) _skb->data; - d_printf(6, dev, "D2 starts at %p \n", _d2); _d2->hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID); _d2->hdr.type = WLP_FRAME_ASSOCIATION; _d2->type = WLP_ASSOC_D2; @@ -917,11 +838,8 @@ int wlp_build_assoc_d2(struct wlp *wlp, struct wlp_wss *wss, used += wlp_set_prim_dev_type(d2_itr + used, &info->prim_dev_type); used += wlp_set_wlp_assc_err(d2_itr + used, WLP_ASSOC_ERROR_NONE); skb_put(_skb, sizeof(*_d2) + used); - d_printf(6, dev, "D2 message:\n"); - d_dump(6, dev, _d2, mem_needed); *skb = _skb; error: - d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result); return result; } @@ -947,7 +865,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb, struct sk_buff *_skb; struct wlp_nonce tmp; - d_fnstart(6, dev, "wlp %p\n", wlp); _skb = dev_alloc_skb(sizeof(*f0)); if (_skb == NULL) { dev_err(dev, "WLP: Unable to allocate memory for F0 " @@ -955,7 +872,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb, goto error_alloc; } f0 = (void *) _skb->data; - d_printf(6, dev, "F0 starts at %p \n", f0); f0->f0_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID); f0->f0_hdr.hdr.type = WLP_FRAME_ASSOCIATION; f0->f0_hdr.type = WLP_ASSOC_F0; @@ -969,7 +885,6 @@ int wlp_build_assoc_f0(struct wlp *wlp, struct sk_buff **skb, *skb = _skb; result = 0; error_alloc: - d_fnend(6, dev, "wlp %p, result %d \n", wlp, result); return result; } @@ -1242,12 +1157,9 @@ void wlp_handle_d1_frame(struct work_struct *ws) enum wlp_wss_sel_mthd sel_mthd = 0; struct wlp_device_info dev_info; enum wlp_assc_error assc_err; - char uuid[WLP_WSS_UUID_STRSIZE]; struct sk_buff *resp = NULL; /* Parse D1 frame */ - d_fnstart(6, dev, "WLP: handle D1 frame. wlp = %p, skb = %p\n", - wlp, skb); mutex_lock(&wss->mutex); mutex_lock(&wlp->mutex); /* to access wlp->uuid */ memset(&dev_info, 0, sizeof(dev_info)); @@ -1258,30 +1170,6 @@ void wlp_handle_d1_frame(struct work_struct *ws) kfree_skb(skb); goto out; } - wlp_wss_uuid_print(uuid, sizeof(uuid), &uuid_e); - d_printf(6, dev, "From D1 frame:\n" - "UUID-E: %s\n" - "Selection method: %d\n" - "Device name (%d bytes): %s\n" - "Model name (%d bytes): %s\n" - "Manufacturer (%d bytes): %s\n" - "Model number (%d bytes): %s\n" - "Serial number (%d bytes): %s\n" - "Primary device type: \n" - " Category: %d \n" - " OUI: %02x:%02x:%02x \n" - " OUI Subdivision: %u \n", - uuid, sel_mthd, - (int)strlen(dev_info.name), dev_info.name, - (int)strlen(dev_info.model_name), dev_info.model_name, - (int)strlen(dev_info.manufacturer), dev_info.manufacturer, - (int)strlen(dev_info.model_nr), dev_info.model_nr, - (int)strlen(dev_info.serial), dev_info.serial, - dev_info.prim_dev_type.category, - dev_info.prim_dev_type.OUI[0], - dev_info.prim_dev_type.OUI[1], - dev_info.prim_dev_type.OUI[2], - dev_info.prim_dev_type.OUIsubdiv); kfree_skb(skb); if (!wlp_uuid_is_set(&wlp->uuid)) { @@ -1316,7 +1204,6 @@ out: kfree(frame_ctx); mutex_unlock(&wlp->mutex); mutex_unlock(&wss->mutex); - d_fnend(6, dev, "WLP: handle D1 frame. wlp = %p\n", wlp); } /** @@ -1546,10 +1433,8 @@ int wlp_parse_c3c4_frame(struct wlp *wlp, struct sk_buff *skb, void *ptr = skb->data; size_t len = skb->len; size_t used; - char buf[WLP_WSS_UUID_STRSIZE]; struct wlp_frame_assoc *assoc = ptr; - d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb); used = sizeof(*assoc); result = wlp_get_wssid(wlp, ptr + used, wssid, len - used); if (result < 0) { @@ -1572,14 +1457,7 @@ int wlp_parse_c3c4_frame(struct wlp *wlp, struct sk_buff *skb, wlp_assoc_frame_str(assoc->type)); goto error_parse; } - wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_printf(6, dev, "WLP: parsed: WSSID %s, tag 0x%02x, virt " - "%02x:%02x:%02x:%02x:%02x:%02x \n", buf, *tag, - virt_addr->data[0], virt_addr->data[1], virt_addr->data[2], - virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]); - error_parse: - d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result); return result; } @@ -1600,7 +1478,6 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss, } *c; struct sk_buff *_skb; - d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss); _skb = dev_alloc_skb(sizeof(*c)); if (_skb == NULL) { dev_err(dev, "WLP: Unable to allocate memory for C1/C2 " @@ -1608,7 +1485,6 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss, goto error_alloc; } c = (void *) _skb->data; - d_printf(6, dev, "C1/C2 starts at %p \n", c); c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID); c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION; c->c_hdr.type = type; @@ -1616,12 +1492,9 @@ int wlp_build_assoc_c1c2(struct wlp *wlp, struct wlp_wss *wss, wlp_set_msg_type(&c->c_hdr.msg_type, type); wlp_set_wssid(&c->wssid, &wss->wssid); skb_put(_skb, sizeof(*c)); - d_printf(6, dev, "C1/C2 message:\n"); - d_dump(6, dev, c, sizeof(*c)); *skb = _skb; result = 0; error_alloc: - d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result); return result; } @@ -1660,7 +1533,6 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss, } *c; struct sk_buff *_skb; - d_fnstart(6, dev, "wlp %p, wss %p \n", wlp, wss); _skb = dev_alloc_skb(sizeof(*c)); if (_skb == NULL) { dev_err(dev, "WLP: Unable to allocate memory for C3/C4 " @@ -1668,7 +1540,6 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss, goto error_alloc; } c = (void *) _skb->data; - d_printf(6, dev, "C3/C4 starts at %p \n", c); c->c_hdr.hdr.mux_hdr = cpu_to_le16(WLP_PROTOCOL_ID); c->c_hdr.hdr.type = WLP_FRAME_ASSOCIATION; c->c_hdr.type = type; @@ -1678,12 +1549,9 @@ int wlp_build_assoc_c3c4(struct wlp *wlp, struct wlp_wss *wss, wlp_set_wss_tag(&c->wss_tag, wss->tag); wlp_set_wss_virt(&c->wss_virt, &wss->virtual_addr); skb_put(_skb, sizeof(*c)); - d_printf(6, dev, "C3/C4 message:\n"); - d_dump(6, dev, c, sizeof(*c)); *skb = _skb; result = 0; error_alloc: - d_fnend(6, dev, "wlp %p, wss %p, result %d \n", wlp, wss, result); return result; } @@ -1709,10 +1577,7 @@ static int wlp_send_assoc_##type(struct wlp *wlp, struct wlp_wss *wss, \ struct device *dev = &wlp->rc->uwb_dev.dev; \ int result; \ struct sk_buff *skb = NULL; \ - d_fnstart(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n", \ - wlp, wss, dev_addr->data[1], dev_addr->data[0]); \ - d_printf(6, dev, "WLP: Constructing %s frame. \n", \ - wlp_assoc_frame_str(id)); \ + \ /* Build the frame */ \ result = wlp_build_assoc_##type(wlp, wss, &skb); \ if (result < 0) { \ @@ -1721,9 +1586,6 @@ static int wlp_send_assoc_##type(struct wlp *wlp, struct wlp_wss *wss, \ goto error_build_assoc; \ } \ /* Send the frame */ \ - d_printf(6, dev, "Transmitting %s frame to %02x:%02x \n", \ - wlp_assoc_frame_str(id), \ - dev_addr->data[1], dev_addr->data[0]); \ BUG_ON(wlp->xmit_frame == NULL); \ result = wlp->xmit_frame(wlp, skb, dev_addr); \ if (result < 0) { \ @@ -1740,8 +1602,6 @@ error_xmit: \ /* We could try again ... */ \ dev_kfree_skb_any(skb);/*we need to free if tx fails*/ \ error_build_assoc: \ - d_fnend(6, dev, "wlp %p, wss %p, neighbor: %02x:%02x\n", \ - wlp, wss, dev_addr->data[1], dev_addr->data[0]); \ return result; \ } @@ -1794,12 +1654,9 @@ void wlp_handle_c1_frame(struct work_struct *ws) struct uwb_dev_addr *src = &frame_ctx->src; int result; struct wlp_uuid wssid; - char buf[WLP_WSS_UUID_STRSIZE]; struct sk_buff *resp = NULL; /* Parse C1 frame */ - d_fnstart(6, dev, "WLP: handle C1 frame. wlp = %p, c1 = %p\n", - wlp, c1); mutex_lock(&wss->mutex); result = wlp_get_wssid(wlp, (void *)c1 + sizeof(*c1), &wssid, len - sizeof(*c1)); @@ -1807,12 +1664,8 @@ void wlp_handle_c1_frame(struct work_struct *ws) dev_err(dev, "WLP: unable to obtain WSSID from C1 frame.\n"); goto out; } - wlp_wss_uuid_print(buf, sizeof(buf), &wssid); - d_printf(6, dev, "Received C1 frame with WSSID %s \n", buf); if (!memcmp(&wssid, &wss->wssid, sizeof(wssid)) && wss->state == WLP_WSS_STATE_ACTIVE) { - d_printf(6, dev, "WSSID from C1 frame is known locally " - "and is active\n"); /* Construct C2 frame */ result = wlp_build_assoc_c2(wlp, wss, &resp); if (result < 0) { @@ -1820,8 +1673,6 @@ void wlp_handle_c1_frame(struct work_struct *ws) goto out; } } else { - d_printf(6, dev, "WSSID from C1 frame is not known locally " - "or is not active\n"); /* Construct F0 frame */ result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV); if (result < 0) { @@ -1830,8 +1681,6 @@ void wlp_handle_c1_frame(struct work_struct *ws) } } /* Send C2 frame */ - d_printf(6, dev, "Transmitting response (C2/F0) frame to %02x:%02x \n", - src->data[1], src->data[0]); BUG_ON(wlp->xmit_frame == NULL); result = wlp->xmit_frame(wlp, resp, src); if (result < 0) { @@ -1846,7 +1695,6 @@ out: kfree_skb(frame_ctx->skb); kfree(frame_ctx); mutex_unlock(&wss->mutex); - d_fnend(6, dev, "WLP: handle C1 frame. wlp = %p\n", wlp); } /** @@ -1868,27 +1716,20 @@ void wlp_handle_c3_frame(struct work_struct *ws) struct sk_buff *skb = frame_ctx->skb; struct uwb_dev_addr *src = &frame_ctx->src; int result; - char buf[WLP_WSS_UUID_STRSIZE]; struct sk_buff *resp = NULL; struct wlp_uuid wssid; u8 tag; struct uwb_mac_addr virt_addr; /* Parse C3 frame */ - d_fnstart(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n", - wlp, skb); mutex_lock(&wss->mutex); result = wlp_parse_c3c4_frame(wlp, skb, &wssid, &tag, &virt_addr); if (result < 0) { dev_err(dev, "WLP: unable to obtain values from C3 frame.\n"); goto out; } - wlp_wss_uuid_print(buf, sizeof(buf), &wssid); - d_printf(6, dev, "Received C3 frame with WSSID %s \n", buf); if (!memcmp(&wssid, &wss->wssid, sizeof(wssid)) && wss->state >= WLP_WSS_STATE_ACTIVE) { - d_printf(6, dev, "WSSID from C3 frame is known locally " - "and is active\n"); result = wlp_eda_update_node(&wlp->eda, src, wss, (void *) virt_addr.data, tag, WLP_WSS_CONNECTED); @@ -1913,8 +1754,6 @@ void wlp_handle_c3_frame(struct work_struct *ws) } } } else { - d_printf(6, dev, "WSSID from C3 frame is not known locally " - "or is not active\n"); /* Construct F0 frame */ result = wlp_build_assoc_f0(wlp, &resp, WLP_ASSOC_ERROR_INV); if (result < 0) { @@ -1923,8 +1762,6 @@ void wlp_handle_c3_frame(struct work_struct *ws) } } /* Send C4 frame */ - d_printf(6, dev, "Transmitting response (C4/F0) frame to %02x:%02x \n", - src->data[1], src->data[0]); BUG_ON(wlp->xmit_frame == NULL); result = wlp->xmit_frame(wlp, resp, src); if (result < 0) { @@ -1939,8 +1776,6 @@ out: kfree_skb(frame_ctx->skb); kfree(frame_ctx); mutex_unlock(&wss->mutex); - d_fnend(6, dev, "WLP: handle C3 frame. wlp = %p, skb = %p\n", - wlp, skb); } diff --git a/drivers/uwb/wlp/sysfs.c b/drivers/uwb/wlp/sysfs.c index 1bb9b1f97d47..0370399ff4bb 100644 --- a/drivers/uwb/wlp/sysfs.c +++ b/drivers/uwb/wlp/sysfs.c @@ -23,8 +23,8 @@ * FIXME: Docs * */ - #include <linux/wlp.h> + #include "wlp-internal.h" static diff --git a/drivers/uwb/wlp/txrx.c b/drivers/uwb/wlp/txrx.c index c701bd1a2887..cd2035768b47 100644 --- a/drivers/uwb/wlp/txrx.c +++ b/drivers/uwb/wlp/txrx.c @@ -26,12 +26,10 @@ #include <linux/etherdevice.h> #include <linux/wlp.h> -#define D_LOCAL 5 -#include <linux/uwb/debug.h> -#include "wlp-internal.h" +#include "wlp-internal.h" -/** +/* * Direct incoming association msg to correct parsing routine * * We only expect D1, E1, C1, C3 messages as new. All other incoming @@ -48,35 +46,31 @@ void wlp_direct_assoc_frame(struct wlp *wlp, struct sk_buff *skb, struct device *dev = &wlp->rc->uwb_dev.dev; struct wlp_frame_assoc *assoc = (void *) skb->data; struct wlp_assoc_frame_ctx *frame_ctx; - d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb); + frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_ATOMIC); if (frame_ctx == NULL) { dev_err(dev, "WLP: Unable to allocate memory for association " "frame handling.\n"); kfree_skb(skb); - goto out; + return; } frame_ctx->wlp = wlp; frame_ctx->skb = skb; frame_ctx->src = *src; switch (assoc->type) { case WLP_ASSOC_D1: - d_printf(5, dev, "Received a D1 frame.\n"); INIT_WORK(&frame_ctx->ws, wlp_handle_d1_frame); schedule_work(&frame_ctx->ws); break; case WLP_ASSOC_E1: - d_printf(5, dev, "Received a E1 frame. FIXME?\n"); kfree_skb(skb); /* Temporary until we handle it */ kfree(frame_ctx); /* Temporary until we handle it */ break; case WLP_ASSOC_C1: - d_printf(5, dev, "Received a C1 frame.\n"); INIT_WORK(&frame_ctx->ws, wlp_handle_c1_frame); schedule_work(&frame_ctx->ws); break; case WLP_ASSOC_C3: - d_printf(5, dev, "Received a C3 frame.\n"); INIT_WORK(&frame_ctx->ws, wlp_handle_c3_frame); schedule_work(&frame_ctx->ws); break; @@ -87,11 +81,9 @@ void wlp_direct_assoc_frame(struct wlp *wlp, struct sk_buff *skb, kfree(frame_ctx); break; } -out: - d_fnend(5, dev, "wlp %p\n", wlp); } -/** +/* * Process incoming association frame * * Although it could be possible to deal with some incoming association @@ -112,7 +104,6 @@ void wlp_receive_assoc_frame(struct wlp *wlp, struct sk_buff *skb, struct wlp_frame_assoc *assoc = (void *) skb->data; struct wlp_session *session = wlp->session; u8 version; - d_fnstart(5, dev, "wlp %p, skb %p\n", wlp, skb); if (wlp_get_version(wlp, &assoc->version, &version, sizeof(assoc->version)) < 0) @@ -150,14 +141,12 @@ void wlp_receive_assoc_frame(struct wlp *wlp, struct sk_buff *skb, } else { wlp_direct_assoc_frame(wlp, skb, src); } - d_fnend(5, dev, "wlp %p\n", wlp); return; error: kfree_skb(skb); - d_fnend(5, dev, "wlp %p\n", wlp); } -/** +/* * Verify incoming frame is from connected neighbor, prep to pass to WLP client * * Verification proceeds according to WLP 0.99 [7.3.1]. The source address @@ -176,7 +165,6 @@ int wlp_verify_prep_rx_frame(struct wlp *wlp, struct sk_buff *skb, struct wlp_eda_node eda_entry; struct wlp_frame_std_abbrv_hdr *hdr = (void *) skb->data; - d_fnstart(6, dev, "wlp %p, skb %p \n", wlp, skb); /*verify*/ result = wlp_copy_eda_node(&wlp->eda, src, &eda_entry); if (result < 0) { @@ -207,11 +195,10 @@ int wlp_verify_prep_rx_frame(struct wlp *wlp, struct sk_buff *skb, /*prep*/ skb_pull(skb, sizeof(*hdr)); out: - d_fnend(6, dev, "wlp %p, skb %p, result = %d \n", wlp, skb, result); return result; } -/** +/* * Receive a WLP frame from device * * @returns: 1 if calling function should free the skb @@ -226,14 +213,12 @@ int wlp_receive_frame(struct device *dev, struct wlp *wlp, struct sk_buff *skb, struct wlp_frame_hdr *hdr; int result = 0; - d_fnstart(6, dev, "skb (%p), len (%u)\n", skb, len); if (len < sizeof(*hdr)) { dev_err(dev, "Not enough data to parse WLP header.\n"); result = -EINVAL; goto out; } hdr = ptr; - d_dump(6, dev, hdr, sizeof(*hdr)); if (le16_to_cpu(hdr->mux_hdr) != WLP_PROTOCOL_ID) { dev_err(dev, "Not a WLP frame type.\n"); result = -EINVAL; @@ -270,7 +255,6 @@ int wlp_receive_frame(struct device *dev, struct wlp *wlp, struct sk_buff *skb, "WLP header.\n"); goto out; } - d_printf(5, dev, "Association frame received.\n"); wlp_receive_assoc_frame(wlp, skb, src); break; default: @@ -283,13 +267,12 @@ out: kfree_skb(skb); result = 0; } - d_fnend(6, dev, "skb (%p)\n", skb); return result; } EXPORT_SYMBOL_GPL(wlp_receive_frame); -/** +/* * Verify frame from network stack, prepare for further transmission * * @skb: the socket buffer that needs to be prepared for transmission (it @@ -343,9 +326,7 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp, int result = -EINVAL; struct ethhdr *eth_hdr = (void *) skb->data; - d_fnstart(6, dev, "wlp (%p), skb (%p) \n", wlp, skb); if (is_broadcast_ether_addr(eth_hdr->h_dest)) { - d_printf(6, dev, "WLP: handling broadcast frame. \n"); result = wlp_eda_for_each(&wlp->eda, wlp_wss_send_copy, skb); if (result < 0) { if (printk_ratelimit()) @@ -357,7 +338,6 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp, result = 1; /* Frame will be transmitted by WLP. */ } else { - d_printf(6, dev, "WLP: handling unicast frame. \n"); result = wlp_eda_for_virtual(&wlp->eda, eth_hdr->h_dest, dst, wlp_wss_prep_hdr, skb); if (unlikely(result < 0)) { @@ -368,7 +348,6 @@ int wlp_prepare_tx_frame(struct device *dev, struct wlp *wlp, } } out: - d_fnend(6, dev, "wlp (%p), skb (%p). result = %d \n", wlp, skb, result); return result; } EXPORT_SYMBOL_GPL(wlp_prepare_tx_frame); diff --git a/drivers/uwb/wlp/wlp-internal.h b/drivers/uwb/wlp/wlp-internal.h index 1c94fabfb1a7..3e8d5de7c5b9 100644 --- a/drivers/uwb/wlp/wlp-internal.h +++ b/drivers/uwb/wlp/wlp-internal.h @@ -42,10 +42,6 @@ enum wlp_wss_connect { extern struct kobj_type wss_ktype; extern struct attribute_group wss_attr_group; -extern int uwb_rc_ie_add(struct uwb_rc *, const struct uwb_ie_hdr *, size_t); -extern int uwb_rc_ie_rm(struct uwb_rc *, enum uwb_ie); - - /* This should be changed to a dynamic array where entries are sorted * by eth_addr and search is done in a binary form * diff --git a/drivers/uwb/wlp/wlp-lc.c b/drivers/uwb/wlp/wlp-lc.c index 0799402e73fb..13db739c4e39 100644 --- a/drivers/uwb/wlp/wlp-lc.c +++ b/drivers/uwb/wlp/wlp-lc.c @@ -21,12 +21,9 @@ * * FIXME: docs */ - #include <linux/wlp.h> -#define D_LOCAL 6 -#include <linux/uwb/debug.h> -#include "wlp-internal.h" +#include "wlp-internal.h" static void wlp_neighbor_init(struct wlp_neighbor_e *neighbor) @@ -61,11 +58,6 @@ int __wlp_alloc_device_info(struct wlp *wlp) static void __wlp_fill_device_info(struct wlp *wlp) { - struct device *dev = &wlp->rc->uwb_dev.dev; - - BUG_ON(wlp->fill_device_info == NULL); - d_printf(6, dev, "Retrieving device information " - "from device driver.\n"); wlp->fill_device_info(wlp, wlp->dev_info); } @@ -127,7 +119,7 @@ void wlp_remove_neighbor_tmp_info(struct wlp_neighbor_e *neighbor) } } -/** +/* * Populate WLP neighborhood cache with neighbor information * * A new neighbor is found. If it is discoverable then we add it to the @@ -141,10 +133,7 @@ int wlp_add_neighbor(struct wlp *wlp, struct uwb_dev *dev) int discoverable; struct wlp_neighbor_e *neighbor; - d_fnstart(6, &dev->dev, "uwb %p \n", dev); - d_printf(6, &dev->dev, "Found neighbor device %02x:%02x \n", - dev->dev_addr.data[1], dev->dev_addr.data[0]); - /** + /* * FIXME: * Use contents of WLP IE found in beacon cache to determine if * neighbor is discoverable. @@ -167,7 +156,6 @@ int wlp_add_neighbor(struct wlp *wlp, struct uwb_dev *dev) list_add(&neighbor->node, &wlp->neighbors); } error_no_mem: - d_fnend(6, &dev->dev, "uwb %p, result = %d \n", dev, result); return result; } @@ -255,8 +243,6 @@ int wlp_d1d2_exchange(struct wlp *wlp, struct wlp_neighbor_e *neighbor, dev_err(dev, "Unable to send D1 frame to neighbor " "%02x:%02x (%d)\n", dev_addr->data[1], dev_addr->data[0], result); - d_printf(6, dev, "Add placeholders into buffer next to " - "neighbor information we have (dev address).\n"); goto out; } /* Create session, wait for response */ @@ -284,8 +270,6 @@ int wlp_d1d2_exchange(struct wlp *wlp, struct wlp_neighbor_e *neighbor, /* Parse message in session->data: it will be either D2 or F0 */ skb = session.data; resp = (void *) skb->data; - d_printf(6, dev, "Received response to D1 frame. \n"); - d_dump(6, dev, skb->data, skb->len > 72 ? 72 : skb->len); if (resp->type == WLP_ASSOC_F0) { result = wlp_parse_f0(wlp, skb); @@ -337,10 +321,9 @@ int wlp_enroll_neighbor(struct wlp *wlp, struct wlp_neighbor_e *neighbor, struct device *dev = &wlp->rc->uwb_dev.dev; char buf[WLP_WSS_UUID_STRSIZE]; struct uwb_dev_addr *dev_addr = &neighbor->uwb_dev->dev_addr; + wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_fnstart(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n", - wlp, neighbor, wss, wssid, buf); - d_printf(6, dev, "Complete me.\n"); + result = wlp_d1d2_exchange(wlp, neighbor, wss, wssid); if (result < 0) { dev_err(dev, "WLP: D1/D2 message exchange for enrollment " @@ -360,13 +343,10 @@ int wlp_enroll_neighbor(struct wlp *wlp, struct wlp_neighbor_e *neighbor, goto error; } else { wss->state = WLP_WSS_STATE_ENROLLED; - d_printf(2, dev, "WLP: Success Enrollment into unsecure WSS " - "%s using neighbor %02x:%02x. \n", buf, - dev_addr->data[1], dev_addr->data[0]); + dev_dbg(dev, "WLP: Success Enrollment into unsecure WSS " + "%s using neighbor %02x:%02x. \n", + buf, dev_addr->data[1], dev_addr->data[0]); } - - d_fnend(6, dev, "wlp %p, neighbor %p, wss %p, wssid %p (%s)\n", - wlp, neighbor, wss, wssid, buf); out: return result; error: @@ -449,7 +429,6 @@ ssize_t wlp_discover(struct wlp *wlp) int result = 0; struct device *dev = &wlp->rc->uwb_dev.dev; - d_fnstart(6, dev, "wlp %p \n", wlp); mutex_lock(&wlp->nbmutex); /* Clear current neighborhood cache. */ __wlp_neighbors_release(wlp); @@ -469,7 +448,6 @@ ssize_t wlp_discover(struct wlp *wlp) } error_dev_for_each: mutex_unlock(&wlp->nbmutex); - d_fnend(6, dev, "wlp %p \n", wlp); return result; } @@ -492,9 +470,6 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev, int result; switch (event) { case UWB_NOTIF_ONAIR: - d_printf(6, dev, "UWB device %02x:%02x is onair\n", - uwb_dev->dev_addr.data[1], - uwb_dev->dev_addr.data[0]); result = wlp_eda_create_node(&wlp->eda, uwb_dev->mac_addr.data, &uwb_dev->dev_addr); @@ -505,18 +480,11 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev, uwb_dev->dev_addr.data[0]); break; case UWB_NOTIF_OFFAIR: - d_printf(6, dev, "UWB device %02x:%02x is offair\n", - uwb_dev->dev_addr.data[1], - uwb_dev->dev_addr.data[0]); wlp_eda_rm_node(&wlp->eda, &uwb_dev->dev_addr); mutex_lock(&wlp->nbmutex); - list_for_each_entry_safe(neighbor, next, &wlp->neighbors, - node) { - if (neighbor->uwb_dev == uwb_dev) { - d_printf(6, dev, "Removing device from " - "neighborhood.\n"); + list_for_each_entry_safe(neighbor, next, &wlp->neighbors, node) { + if (neighbor->uwb_dev == uwb_dev) __wlp_neighbor_release(neighbor); - } } mutex_unlock(&wlp->nbmutex); break; @@ -526,38 +494,47 @@ void wlp_uwb_notifs_cb(void *_wlp, struct uwb_dev *uwb_dev, } } -int wlp_setup(struct wlp *wlp, struct uwb_rc *rc) +static void wlp_channel_changed(struct uwb_pal *pal, int channel) +{ + struct wlp *wlp = container_of(pal, struct wlp, pal); + + if (channel < 0) + netif_carrier_off(wlp->ndev); + else + netif_carrier_on(wlp->ndev); +} + +int wlp_setup(struct wlp *wlp, struct uwb_rc *rc, struct net_device *ndev) { - struct device *dev = &rc->uwb_dev.dev; int result; - d_fnstart(6, dev, "wlp %p\n", wlp); BUG_ON(wlp->fill_device_info == NULL); BUG_ON(wlp->xmit_frame == NULL); BUG_ON(wlp->stop_queue == NULL); BUG_ON(wlp->start_queue == NULL); + wlp->rc = rc; + wlp->ndev = ndev; wlp_eda_init(&wlp->eda);/* Set up address cache */ wlp->uwb_notifs_handler.cb = wlp_uwb_notifs_cb; wlp->uwb_notifs_handler.data = wlp; uwb_notifs_register(rc, &wlp->uwb_notifs_handler); uwb_pal_init(&wlp->pal); - result = uwb_pal_register(rc, &wlp->pal); + wlp->pal.rc = rc; + wlp->pal.channel_changed = wlp_channel_changed; + result = uwb_pal_register(&wlp->pal); if (result < 0) uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler); - d_fnend(6, dev, "wlp %p, result = %d\n", wlp, result); return result; } EXPORT_SYMBOL_GPL(wlp_setup); void wlp_remove(struct wlp *wlp) { - struct device *dev = &wlp->rc->uwb_dev.dev; - d_fnstart(6, dev, "wlp %p\n", wlp); wlp_neighbors_release(wlp); - uwb_pal_unregister(wlp->rc, &wlp->pal); + uwb_pal_unregister(&wlp->pal); uwb_notifs_deregister(wlp->rc, &wlp->uwb_notifs_handler); wlp_eda_release(&wlp->eda); mutex_lock(&wlp->mutex); @@ -565,9 +542,6 @@ void wlp_remove(struct wlp *wlp) kfree(wlp->dev_info); mutex_unlock(&wlp->mutex); wlp->rc = NULL; - /* We have to use NULL here because this function can be called - * when the device disappeared. */ - d_fnend(6, NULL, "wlp %p\n", wlp); } EXPORT_SYMBOL_GPL(wlp_remove); diff --git a/drivers/uwb/wlp/wss-lc.c b/drivers/uwb/wlp/wss-lc.c index 96b18c9bd6e9..5913c7a5d922 100644 --- a/drivers/uwb/wlp/wss-lc.c +++ b/drivers/uwb/wlp/wss-lc.c @@ -43,14 +43,11 @@ * wlp_wss_release() * wlp_wss_reset() */ - #include <linux/etherdevice.h> /* for is_valid_ether_addr */ #include <linux/skbuff.h> #include <linux/wlp.h> -#define D_LOCAL 5 -#include <linux/uwb/debug.h> -#include "wlp-internal.h" +#include "wlp-internal.h" size_t wlp_wss_key_print(char *buf, size_t bufsize, u8 *key) { @@ -116,9 +113,6 @@ struct uwb_mac_addr wlp_wss_sel_bcast_addr(struct wlp_wss *wss) */ void wlp_wss_reset(struct wlp_wss *wss) { - struct wlp *wlp = container_of(wss, struct wlp, wss); - struct device *dev = &wlp->rc->uwb_dev.dev; - d_fnstart(5, dev, "wss (%p) \n", wss); memset(&wss->wssid, 0, sizeof(wss->wssid)); wss->hash = 0; memset(&wss->name[0], 0, sizeof(wss->name)); @@ -127,7 +121,6 @@ void wlp_wss_reset(struct wlp_wss *wss) memset(&wss->master_key[0], 0, sizeof(wss->master_key)); wss->tag = 0; wss->state = WLP_WSS_STATE_NONE; - d_fnend(5, dev, "wss (%p) \n", wss); } /** @@ -145,7 +138,6 @@ int wlp_wss_sysfs_add(struct wlp_wss *wss, char *wssid_str) struct device *dev = &wlp->rc->uwb_dev.dev; int result; - d_fnstart(5, dev, "wss (%p), wssid: %s\n", wss, wssid_str); result = kobject_set_name(&wss->kobj, "wss-%s", wssid_str); if (result < 0) return result; @@ -162,7 +154,6 @@ int wlp_wss_sysfs_add(struct wlp_wss *wss, char *wssid_str) result); goto error_sysfs_create_group; } - d_fnend(5, dev, "Completed. result = %d \n", result); return 0; error_sysfs_create_group: @@ -214,22 +205,14 @@ int wlp_wss_enroll_target(struct wlp_wss *wss, struct wlp_uuid *wssid, struct wlp *wlp = container_of(wss, struct wlp, wss); struct device *dev = &wlp->rc->uwb_dev.dev; struct wlp_neighbor_e *neighbor; - char buf[WLP_WSS_UUID_STRSIZE]; int result = -ENXIO; struct uwb_dev_addr *dev_addr; - wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_fnstart(5, dev, "wss %p, wssid %s, registrar %02x:%02x \n", - wss, buf, dest->data[1], dest->data[0]); mutex_lock(&wlp->nbmutex); list_for_each_entry(neighbor, &wlp->neighbors, node) { dev_addr = &neighbor->uwb_dev->dev_addr; if (!memcmp(dest, dev_addr, sizeof(*dest))) { - d_printf(5, dev, "Neighbor %02x:%02x is valid, " - "enrolling. \n", - dev_addr->data[1], dev_addr->data[0]); - result = wlp_enroll_neighbor(wlp, neighbor, wss, - wssid); + result = wlp_enroll_neighbor(wlp, neighbor, wss, wssid); break; } } @@ -237,8 +220,6 @@ int wlp_wss_enroll_target(struct wlp_wss *wss, struct wlp_uuid *wssid, dev_err(dev, "WLP: Cannot find neighbor %02x:%02x. \n", dest->data[1], dest->data[0]); mutex_unlock(&wlp->nbmutex); - d_fnend(5, dev, "wss %p, wssid %s, registrar %02x:%02x, result %d \n", - wss, buf, dest->data[1], dest->data[0], result); return result; } @@ -260,16 +241,11 @@ int wlp_wss_enroll_discovered(struct wlp_wss *wss, struct wlp_uuid *wssid) char buf[WLP_WSS_UUID_STRSIZE]; int result = -ENXIO; - wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_fnstart(5, dev, "wss %p, wssid %s \n", wss, buf); + mutex_lock(&wlp->nbmutex); list_for_each_entry(neighbor, &wlp->neighbors, node) { list_for_each_entry(wssid_e, &neighbor->wssid, node) { if (!memcmp(wssid, &wssid_e->wssid, sizeof(*wssid))) { - d_printf(5, dev, "Found WSSID %s in neighbor " - "%02x:%02x cache. \n", buf, - neighbor->uwb_dev->dev_addr.data[1], - neighbor->uwb_dev->dev_addr.data[0]); result = wlp_enroll_neighbor(wlp, neighbor, wss, wssid); if (result == 0) /* enrollment success */ @@ -279,10 +255,11 @@ int wlp_wss_enroll_discovered(struct wlp_wss *wss, struct wlp_uuid *wssid) } } out: - if (result == -ENXIO) + if (result == -ENXIO) { + wlp_wss_uuid_print(buf, sizeof(buf), wssid); dev_err(dev, "WLP: Cannot find WSSID %s in cache. \n", buf); + } mutex_unlock(&wlp->nbmutex); - d_fnend(5, dev, "wss %p, wssid %s, result %d \n", wss, buf, result); return result; } @@ -307,27 +284,22 @@ int wlp_wss_enroll(struct wlp_wss *wss, struct wlp_uuid *wssid, struct uwb_dev_addr bcast = {.data = {0xff, 0xff} }; wlp_wss_uuid_print(buf, sizeof(buf), wssid); + if (wss->state != WLP_WSS_STATE_NONE) { dev_err(dev, "WLP: Already enrolled in WSS %s.\n", buf); result = -EEXIST; goto error; } - if (!memcmp(&bcast, devaddr, sizeof(bcast))) { - d_printf(5, dev, "Request to enroll in discovered WSS " - "with WSSID %s \n", buf); + if (!memcmp(&bcast, devaddr, sizeof(bcast))) result = wlp_wss_enroll_discovered(wss, wssid); - } else { - d_printf(5, dev, "Request to enroll in WSSID %s with " - "registrar %02x:%02x\n", buf, devaddr->data[1], - devaddr->data[0]); + else result = wlp_wss_enroll_target(wss, wssid, devaddr); - } if (result < 0) { dev_err(dev, "WLP: Unable to enroll into WSS %s, result %d \n", buf, result); goto error; } - d_printf(2, dev, "Successfully enrolled into WSS %s \n", buf); + dev_dbg(dev, "Successfully enrolled into WSS %s \n", buf); result = wlp_wss_sysfs_add(wss, buf); if (result < 0) { dev_err(dev, "WLP: Unable to set up sysfs for WSS kobject.\n"); @@ -363,7 +335,6 @@ int wlp_wss_activate(struct wlp_wss *wss) u8 hash; /* only include one hash */ } ie_data; - d_fnstart(5, dev, "Activating WSS %p. \n", wss); BUG_ON(wss->state != WLP_WSS_STATE_ENROLLED); wss->hash = wlp_wss_comp_wssid_hash(&wss->wssid); wss->tag = wss->hash; @@ -382,7 +353,6 @@ int wlp_wss_activate(struct wlp_wss *wss) wss->state = WLP_WSS_STATE_ACTIVE; result = 0; error_wlp_ie: - d_fnend(5, dev, "Activating WSS %p, result = %d \n", wss, result); return result; } @@ -405,7 +375,6 @@ int wlp_wss_enroll_activate(struct wlp_wss *wss, struct wlp_uuid *wssid, int result = 0; char buf[WLP_WSS_UUID_STRSIZE]; - d_fnstart(5, dev, "Enrollment and activation requested. \n"); mutex_lock(&wss->mutex); result = wlp_wss_enroll(wss, wssid, devaddr); if (result < 0) { @@ -424,7 +393,6 @@ int wlp_wss_enroll_activate(struct wlp_wss *wss, struct wlp_uuid *wssid, error_activate: error_enroll: mutex_unlock(&wss->mutex); - d_fnend(5, dev, "Completed. result = %d \n", result); return result; } @@ -447,11 +415,9 @@ int wlp_wss_create_activate(struct wlp_wss *wss, struct wlp_uuid *wssid, struct device *dev = &wlp->rc->uwb_dev.dev; int result = 0; char buf[WLP_WSS_UUID_STRSIZE]; - d_fnstart(5, dev, "Request to create new WSS.\n"); + result = wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_printf(5, dev, "Request to create WSS: WSSID=%s, name=%s, " - "sec_status=%u, accepting enrollment=%u \n", - buf, name, sec_status, accept); + if (!mutex_trylock(&wss->mutex)) { dev_err(dev, "WLP: WLP association session in progress.\n"); return -EBUSY; @@ -498,7 +464,6 @@ int wlp_wss_create_activate(struct wlp_wss *wss, struct wlp_uuid *wssid, result = 0; out: mutex_unlock(&wss->mutex); - d_fnend(5, dev, "Completed. result = %d \n", result); return result; } @@ -520,16 +485,12 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss, { int result = 0; struct device *dev = &wlp->rc->uwb_dev.dev; - char buf[WLP_WSS_UUID_STRSIZE]; DECLARE_COMPLETION_ONSTACK(completion); struct wlp_session session; struct sk_buff *skb; struct wlp_frame_assoc *resp; struct wlp_uuid wssid; - wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid); - d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n", - wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]); mutex_lock(&wlp->mutex); /* Send C1 association frame */ result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C1); @@ -565,8 +526,6 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss, /* Parse message in session->data: it will be either C2 or F0 */ skb = session.data; resp = (void *) skb->data; - d_printf(5, dev, "Received response to C1 frame. \n"); - d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len); if (resp->type == WLP_ASSOC_F0) { result = wlp_parse_f0(wlp, skb); if (result < 0) @@ -584,11 +543,9 @@ int wlp_wss_is_active(struct wlp *wlp, struct wlp_wss *wss, result = 0; goto error_resp_parse; } - if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))) { - d_printf(5, dev, "WSSID in C2 frame matches local " - "active WSS.\n"); + if (!memcmp(&wssid, &wss->wssid, sizeof(wssid))) result = 1; - } else { + else { dev_err(dev, "WLP: Received a C2 frame without matching " "WSSID.\n"); result = 0; @@ -598,8 +555,6 @@ error_resp_parse: out: wlp->session = NULL; mutex_unlock(&wlp->mutex); - d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n", - wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]); return result; } @@ -620,16 +575,8 @@ int wlp_wss_activate_connection(struct wlp *wlp, struct wlp_wss *wss, { struct device *dev = &wlp->rc->uwb_dev.dev; int result = 0; - char buf[WLP_WSS_UUID_STRSIZE]; - wlp_wss_uuid_print(buf, sizeof(buf), wssid); - d_fnstart(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual " - "%02x:%02x:%02x:%02x:%02x:%02x \n", wlp, wss, buf, *tag, - virt_addr->data[0], virt_addr->data[1], virt_addr->data[2], - virt_addr->data[3], virt_addr->data[4], virt_addr->data[5]); if (!memcmp(wssid, &wss->wssid, sizeof(*wssid))) { - d_printf(5, dev, "WSSID from neighbor frame matches local " - "active WSS.\n"); /* Update EDA cache */ result = wlp_eda_update_node(&wlp->eda, dev_addr, wss, (void *) virt_addr->data, *tag, @@ -638,18 +585,9 @@ int wlp_wss_activate_connection(struct wlp *wlp, struct wlp_wss *wss, dev_err(dev, "WLP: Unable to update EDA cache " "with new connected neighbor information.\n"); } else { - dev_err(dev, "WLP: Neighbor does not have matching " - "WSSID.\n"); + dev_err(dev, "WLP: Neighbor does not have matching WSSID.\n"); result = -EINVAL; } - - d_fnend(5, dev, "wlp %p, wss %p, wssid %s, tag %u, virtual " - "%02x:%02x:%02x:%02x:%02x:%02x, result = %d \n", - wlp, wss, buf, *tag, - virt_addr->data[0], virt_addr->data[1], virt_addr->data[2], - virt_addr->data[3], virt_addr->data[4], virt_addr->data[5], - result); - return result; } @@ -665,7 +603,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss, { int result; struct device *dev = &wlp->rc->uwb_dev.dev; - char buf[WLP_WSS_UUID_STRSIZE]; struct wlp_uuid wssid; u8 tag; struct uwb_mac_addr virt_addr; @@ -674,9 +611,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss, struct wlp_frame_assoc *resp; struct sk_buff *skb; - wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid); - d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n", - wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]); mutex_lock(&wlp->mutex); /* Send C3 association frame */ result = wlp_send_assoc_frame(wlp, wss, dev_addr, WLP_ASSOC_C3); @@ -711,8 +645,6 @@ int wlp_wss_connect_neighbor(struct wlp *wlp, struct wlp_wss *wss, /* Parse message in session->data: it will be either C4 or F0 */ skb = session.data; resp = (void *) skb->data; - d_printf(5, dev, "Received response to C3 frame. \n"); - d_dump(5, dev, skb->data, skb->len > 72 ? 72 : skb->len); if (resp->type == WLP_ASSOC_F0) { result = wlp_parse_f0(wlp, skb); if (result < 0) @@ -744,8 +676,6 @@ out: WLP_WSS_CONNECT_FAILED); wlp->session = NULL; mutex_unlock(&wlp->mutex); - d_fnend(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n", - wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]); return result; } @@ -780,12 +710,8 @@ void wlp_wss_connect_send(struct work_struct *ws) struct wlp_wss *wss = &wlp->wss; int result; struct device *dev = &wlp->rc->uwb_dev.dev; - char buf[WLP_WSS_UUID_STRSIZE]; mutex_lock(&wss->mutex); - wlp_wss_uuid_print(buf, sizeof(buf), &wss->wssid); - d_fnstart(5, dev, "wlp %p, wss %p (wssid %s), neighbor %02x:%02x \n", - wlp, wss, buf, dev_addr->data[1], dev_addr->data[0]); if (wss->state < WLP_WSS_STATE_ACTIVE) { if (printk_ratelimit()) dev_err(dev, "WLP: Attempting to connect with " @@ -836,7 +762,6 @@ out: BUG_ON(wlp->start_queue == NULL); wlp->start_queue(wlp); mutex_unlock(&wss->mutex); - d_fnend(5, dev, "wlp %p, wss %p (wssid %s)\n", wlp, wss, buf); } /** @@ -855,7 +780,6 @@ int wlp_wss_prep_hdr(struct wlp *wlp, struct wlp_eda_node *eda_entry, struct sk_buff *skb = _skb; struct wlp_frame_std_abbrv_hdr *std_hdr; - d_fnstart(6, dev, "wlp %p \n", wlp); if (eda_entry->state == WLP_WSS_CONNECTED) { /* Add WLP header */ BUG_ON(skb_headroom(skb) < sizeof(*std_hdr)); @@ -873,7 +797,6 @@ int wlp_wss_prep_hdr(struct wlp *wlp, struct wlp_eda_node *eda_entry, dev_addr->data[0]); result = -EINVAL; } - d_fnend(6, dev, "wlp %p \n", wlp); return result; } @@ -893,16 +816,9 @@ int wlp_wss_connect_prep(struct wlp *wlp, struct wlp_eda_node *eda_entry, { int result = 0; struct device *dev = &wlp->rc->uwb_dev.dev; - struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr; - unsigned char *eth_addr = eda_entry->eth_addr; struct sk_buff *skb = _skb; struct wlp_assoc_conn_ctx *conn_ctx; - d_fnstart(5, dev, "wlp %p\n", wlp); - d_printf(5, dev, "To neighbor %02x:%02x with eth " - "%02x:%02x:%02x:%02x:%02x:%02x\n", dev_addr->data[1], - dev_addr->data[0], eth_addr[0], eth_addr[1], eth_addr[2], - eth_addr[3], eth_addr[4], eth_addr[5]); if (eda_entry->state == WLP_WSS_UNCONNECTED) { /* We don't want any more packets while we set up connection */ BUG_ON(wlp->stop_queue == NULL); @@ -929,12 +845,9 @@ int wlp_wss_connect_prep(struct wlp *wlp, struct wlp_eda_node *eda_entry, "previously. Not retrying. \n"); result = -ENONET; goto out; - } else { /* eda_entry->state == WLP_WSS_CONNECTED */ - d_printf(5, dev, "Neighbor is connected, preparing frame.\n"); + } else /* eda_entry->state == WLP_WSS_CONNECTED */ result = wlp_wss_prep_hdr(wlp, eda_entry, skb); - } out: - d_fnend(5, dev, "wlp %p, result = %d \n", wlp, result); return result; } @@ -957,8 +870,6 @@ int wlp_wss_send_copy(struct wlp *wlp, struct wlp_eda_node *eda_entry, struct sk_buff *copy; struct uwb_dev_addr *dev_addr = &eda_entry->dev_addr; - d_fnstart(5, dev, "to neighbor %02x:%02x, skb (%p) \n", - dev_addr->data[1], dev_addr->data[0], skb); copy = skb_copy(skb, GFP_ATOMIC); if (copy == NULL) { if (printk_ratelimit()) @@ -988,8 +899,6 @@ int wlp_wss_send_copy(struct wlp *wlp, struct wlp_eda_node *eda_entry, dev_kfree_skb_irq(copy);/*we need to free if tx fails */ } out: - d_fnend(5, dev, "to neighbor %02x:%02x \n", dev_addr->data[1], - dev_addr->data[0]); return result; } @@ -1005,7 +914,7 @@ int wlp_wss_setup(struct net_device *net_dev, struct wlp_wss *wss) struct wlp *wlp = container_of(wss, struct wlp, wss); struct device *dev = &wlp->rc->uwb_dev.dev; int result = 0; - d_fnstart(5, dev, "wss (%p) \n", wss); + mutex_lock(&wss->mutex); wss->kobj.parent = &net_dev->dev.kobj; if (!is_valid_ether_addr(net_dev->dev_addr)) { @@ -1018,7 +927,6 @@ int wlp_wss_setup(struct net_device *net_dev, struct wlp_wss *wss) sizeof(wss->virtual_addr.data)); out: mutex_unlock(&wss->mutex); - d_fnend(5, dev, "wss (%p) \n", wss); return result; } EXPORT_SYMBOL_GPL(wlp_wss_setup); @@ -1035,8 +943,7 @@ EXPORT_SYMBOL_GPL(wlp_wss_setup); void wlp_wss_remove(struct wlp_wss *wss) { struct wlp *wlp = container_of(wss, struct wlp, wss); - struct device *dev = &wlp->rc->uwb_dev.dev; - d_fnstart(5, dev, "wss (%p) \n", wss); + mutex_lock(&wss->mutex); if (wss->state == WLP_WSS_STATE_ACTIVE) uwb_rc_ie_rm(wlp->rc, UWB_IE_WLP); @@ -1050,6 +957,5 @@ void wlp_wss_remove(struct wlp_wss *wss) wlp_eda_release(&wlp->eda); wlp_eda_init(&wlp->eda); mutex_unlock(&wss->mutex); - d_fnend(5, dev, "wss (%p) \n", wss); } EXPORT_SYMBOL_GPL(wlp_wss_remove); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 4fd3fa5546b1..ec68c741b564 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -55,6 +55,13 @@ config SOFT_WATCHDOG To compile this driver as a module, choose M here: the module will be called softdog. +config WM8350_WATCHDOG + tristate "WM8350 watchdog" + depends on MFD_WM8350 + help + Support for the watchdog in the WM8350 AudioPlus PMIC. When + the watchdog triggers the system will be reset. + # ALPHA Architecture # ARM Architecture @@ -551,6 +558,18 @@ config CPU5_WDT To compile this driver as a module, choose M here: the module will be called cpu5wdt. +config SMSC_SCH311X_WDT + tristate "SMSC SCH311X Watchdog Timer" + depends on X86 + ---help--- + This is the driver for the hardware watchdog timer on the + SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset + (LPC IO with 8042 KBC, Reset Generation, HWM and multiple + serial ports). + + To compile this driver as a module, choose M here: the + module will be called sch311x_wdt. + config SMSC37B787_WDT tristate "Winbond SMsC37B787 Watchdog Timer" depends on X86 diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index e352bbb7630b..c19b866f5ed1 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_60XX_WDT) += sbc60xxwdt.o obj-$(CONFIG_SBC8360_WDT) += sbc8360.o obj-$(CONFIG_SBC7240_WDT) += sbc7240_wdt.o obj-$(CONFIG_CPU5_WDT) += cpu5wdt.o +obj-$(CONFIG_SMSC_SCH311X_WDT) += sch311x_wdt.o obj-$(CONFIG_SMSC37B787_WDT) += smsc37b787_wdt.o obj-$(CONFIG_W83627HF_WDT) += w83627hf_wdt.o obj-$(CONFIG_W83697HF_WDT) += w83697hf_wdt.o @@ -133,4 +134,5 @@ obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o # XTENSA Architecture # Architecture Independant +obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o diff --git a/drivers/watchdog/ib700wdt.c b/drivers/watchdog/ib700wdt.c index 317ef2b16cff..4bef3ddff4a5 100644 --- a/drivers/watchdog/ib700wdt.c +++ b/drivers/watchdog/ib700wdt.c @@ -91,32 +91,16 @@ static char expect_close; * */ -static int wd_times[] = { - 30, /* 0x0 */ - 28, /* 0x1 */ - 26, /* 0x2 */ - 24, /* 0x3 */ - 22, /* 0x4 */ - 20, /* 0x5 */ - 18, /* 0x6 */ - 16, /* 0x7 */ - 14, /* 0x8 */ - 12, /* 0x9 */ - 10, /* 0xA */ - 8, /* 0xB */ - 6, /* 0xC */ - 4, /* 0xD */ - 2, /* 0xE */ - 0, /* 0xF */ -}; - #define WDT_STOP 0x441 #define WDT_START 0x443 /* Default timeout */ -#define WD_TIMO 0 /* 30 seconds +/- 20%, from table */ - -static int wd_margin = WD_TIMO; +#define WATCHDOG_TIMEOUT 30 /* 30 seconds +/- 20% */ +static int timeout = WATCHDOG_TIMEOUT; /* in seconds */ +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, + "Watchdog timeout in seconds. 0<= timeout <=30, default=" + __MODULE_STRING(WATCHDOG_TIMEOUT) "."); static int nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, int, 0); @@ -131,6 +115,8 @@ MODULE_PARM_DESC(nowayout, static void ibwdt_ping(void) { + int wd_margin = 15 - ((timeout + 1) / 2); + spin_lock(&ibwdt_lock); /* Write a watchdog value */ @@ -148,15 +134,10 @@ static void ibwdt_disable(void) static int ibwdt_set_heartbeat(int t) { - int i; - - if ((t < 0) || (t > 30)) + if (t < 0 || t > 30) return -EINVAL; - for (i = 0x0F; i > -1; i--) - if (wd_times[i] >= t) - break; - wd_margin = i; + timeout = t; return 0; } @@ -240,7 +221,7 @@ static long ibwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* Fall */ case WDIOC_GETTIMEOUT: - return put_user(wd_times[wd_margin], p); + return put_user(timeout, p); default: return -ENOTTY; @@ -317,6 +298,14 @@ static int __devinit ibwdt_probe(struct platform_device *dev) goto out_nostartreg; } + /* Check that the heartbeat value is within it's range ; + * if not reset to the default */ + if (ibwdt_set_heartbeat(timeout)) { + ibwdt_set_heartbeat(WATCHDOG_TIMEOUT); + printk(KERN_INFO PFX + "timeout value must be 0<=x<=30, using %d\n", timeout); + } + res = misc_register(&ibwdt_miscdev); if (res) { printk(KERN_ERR PFX "failed to register misc device\n"); diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c new file mode 100644 index 000000000000..569eb295a7a8 --- /dev/null +++ b/drivers/watchdog/sch311x_wdt.c @@ -0,0 +1,578 @@ +/* + * sch311x_wdt.c - Driver for the SCH311x Super-I/O chips + * integrated watchdog. + * + * (c) Copyright 2008 Wim Van Sebroeck <wim@iguana.be>. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Wim Van Sebroeck nor Iguana vzw. admit liability nor + * provide warranty for any of this software. This material is + * provided "AS-IS" and at no charge. + */ + +/* + * Includes, defines, variables, module parameters, ... + */ + +/* Includes */ +#include <linux/module.h> /* For module specific items */ +#include <linux/moduleparam.h> /* For new moduleparam's */ +#include <linux/types.h> /* For standard types (like size_t) */ +#include <linux/errno.h> /* For the -ENODEV/... values */ +#include <linux/kernel.h> /* For printk/... */ +#include <linux/miscdevice.h> /* For MODULE_ALIAS_MISCDEV + (WATCHDOG_MINOR) */ +#include <linux/watchdog.h> /* For the watchdog specific items */ +#include <linux/init.h> /* For __init/__exit/... */ +#include <linux/fs.h> /* For file operations */ +#include <linux/platform_device.h> /* For platform_driver framework */ +#include <linux/ioport.h> /* For io-port access */ +#include <linux/spinlock.h> /* For spin_lock/spin_unlock/... */ +#include <linux/uaccess.h> /* For copy_to_user/put_user/... */ +#include <linux/io.h> /* For inb/outb/... */ + +/* Module and version information */ +#define DRV_NAME "sch311x_wdt" +#define PFX DRV_NAME ": " + +/* Runtime registers */ +#define RESGEN 0x1d +#define GP60 0x47 +#define WDT_TIME_OUT 0x65 +#define WDT_VAL 0x66 +#define WDT_CFG 0x67 +#define WDT_CTRL 0x68 + +/* internal variables */ +static unsigned long sch311x_wdt_is_open; +static char sch311x_wdt_expect_close; +static struct platform_device *sch311x_wdt_pdev; + +static int sch311x_ioports[] = { 0x2e, 0x4e, 0x162e, 0x164e, 0x00 }; + +static struct { /* The devices private data */ + /* the Runtime Register base address */ + unsigned short runtime_reg; + /* The card's boot status */ + int boot_status; + /* the lock for io operations */ + spinlock_t io_lock; +} sch311x_wdt_data; + +/* Module load parameters */ +static unsigned short force_id; +module_param(force_id, ushort, 0); +MODULE_PARM_DESC(force_id, "Override the detected device ID"); + +static unsigned short therm_trip; +module_param(therm_trip, ushort, 0); +MODULE_PARM_DESC(therm_trip, "Should a ThermTrip trigger the reset generator"); + +#define WATCHDOG_TIMEOUT 60 /* 60 sec default timeout */ +static int timeout = WATCHDOG_TIMEOUT; /* in seconds */ +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, + "Watchdog timeout in seconds. 1<= timeout <=15300, default=" + __MODULE_STRING(WATCHDOG_TIMEOUT) "."); + +static int nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, int, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +/* + * Super-IO functions + */ + +static inline void sch311x_sio_enter(int sio_config_port) +{ + outb(0x55, sio_config_port); +} + +static inline void sch311x_sio_exit(int sio_config_port) +{ + outb(0xaa, sio_config_port); +} + +static inline int sch311x_sio_inb(int sio_config_port, int reg) +{ + outb(reg, sio_config_port); + return inb(sio_config_port + 1); +} + +static inline void sch311x_sio_outb(int sio_config_port, int reg, int val) +{ + outb(reg, sio_config_port); + outb(val, sio_config_port + 1); +} + +/* + * Watchdog Operations + */ + +static void sch311x_wdt_set_timeout(int t) +{ + unsigned char timeout_unit = 0x80; + + /* When new timeout is bigger then 255 seconds, we will use minutes */ + if (t > 255) { + timeout_unit = 0; + t /= 60; + } + + /* -- Watchdog Timeout -- + * Bit 0-6 (Reserved) + * Bit 7 WDT Time-out Value Units Select + * (0 = Minutes, 1 = Seconds) + */ + outb(timeout_unit, sch311x_wdt_data.runtime_reg + WDT_TIME_OUT); + + /* -- Watchdog Timer Time-out Value -- + * Bit 0-7 Binary coded units (0=Disabled, 1..255) + */ + outb(t, sch311x_wdt_data.runtime_reg + WDT_VAL); +} + +static void sch311x_wdt_start(void) +{ + spin_lock(&sch311x_wdt_data.io_lock); + + /* set watchdog's timeout */ + sch311x_wdt_set_timeout(timeout); + /* enable the watchdog */ + /* -- General Purpose I/O Bit 6.0 -- + * Bit 0, In/Out: 0 = Output, 1 = Input + * Bit 1, Polarity: 0 = No Invert, 1 = Invert + * Bit 2-3, Function select: 00 = GPI/O, 01 = LED1, 11 = WDT, + * 10 = Either Edge Triggered Intr.4 + * Bit 4-6 (Reserved) + * Bit 7, Output Type: 0 = Push Pull Bit, 1 = Open Drain + */ + outb(0x0e, sch311x_wdt_data.runtime_reg + GP60); + + spin_unlock(&sch311x_wdt_data.io_lock); + +} + +static void sch311x_wdt_stop(void) +{ + spin_lock(&sch311x_wdt_data.io_lock); + + /* stop the watchdog */ + outb(0x01, sch311x_wdt_data.runtime_reg + GP60); + /* disable timeout by setting it to 0 */ + sch311x_wdt_set_timeout(0); + + spin_unlock(&sch311x_wdt_data.io_lock); +} + +static void sch311x_wdt_keepalive(void) +{ + spin_lock(&sch311x_wdt_data.io_lock); + sch311x_wdt_set_timeout(timeout); + spin_unlock(&sch311x_wdt_data.io_lock); +} + +static int sch311x_wdt_set_heartbeat(int t) +{ + if (t < 1 || t > (255*60)) + return -EINVAL; + + /* When new timeout is bigger then 255 seconds, + * we will round up to minutes (with a max of 255) */ + if (t > 255) + t = (((t - 1) / 60) + 1) * 60; + + timeout = t; + return 0; +} + +static void sch311x_wdt_get_status(int *status) +{ + unsigned char new_status; + + *status = 0; + + spin_lock(&sch311x_wdt_data.io_lock); + + /* -- Watchdog timer control -- + * Bit 0 Status Bit: 0 = Timer counting, 1 = Timeout occured + * Bit 1 Reserved + * Bit 2 Force Timeout: 1 = Forces WD timeout event (self-cleaning) + * Bit 3 P20 Force Timeout enabled: + * 0 = P20 activity does not generate the WD timeout event + * 1 = P20 Allows rising edge of P20, from the keyboard + * controller, to force the WD timeout event. + * Bit 4-7 Reserved + */ + new_status = inb(sch311x_wdt_data.runtime_reg + WDT_CTRL); + if (new_status & 0x01) + *status |= WDIOF_CARDRESET; + + spin_unlock(&sch311x_wdt_data.io_lock); +} + +/* + * /dev/watchdog handling + */ + +static ssize_t sch311x_wdt_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + if (count) { + if (!nowayout) { + size_t i; + + sch311x_wdt_expect_close = 0; + + for (i = 0; i != count; i++) { + char c; + if (get_user(c, buf + i)) + return -EFAULT; + if (c == 'V') + sch311x_wdt_expect_close = 42; + } + } + sch311x_wdt_keepalive(); + } + return count; +} + +static long sch311x_wdt_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int status; + int new_timeout; + void __user *argp = (void __user *)arg; + int __user *p = argp; + static struct watchdog_info ident = { + .options = WDIOF_KEEPALIVEPING | + WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE, + .firmware_version = 1, + .identity = DRV_NAME, + }; + + switch (cmd) { + case WDIOC_GETSUPPORT: + if (copy_to_user(argp, &ident, sizeof(ident))) + return -EFAULT; + break; + + case WDIOC_GETSTATUS: + { + sch311x_wdt_get_status(&status); + return put_user(status, p); + } + case WDIOC_GETBOOTSTATUS: + return put_user(sch311x_wdt_data.boot_status, p); + + case WDIOC_SETOPTIONS: + { + int options, retval = -EINVAL; + + if (get_user(options, p)) + return -EFAULT; + if (options & WDIOS_DISABLECARD) { + sch311x_wdt_stop(); + retval = 0; + } + if (options & WDIOS_ENABLECARD) { + sch311x_wdt_start(); + retval = 0; + } + return retval; + } + case WDIOC_KEEPALIVE: + sch311x_wdt_keepalive(); + break; + + case WDIOC_SETTIMEOUT: + if (get_user(new_timeout, p)) + return -EFAULT; + if (sch311x_wdt_set_heartbeat(new_timeout)) + return -EINVAL; + sch311x_wdt_keepalive(); + /* Fall */ + case WDIOC_GETTIMEOUT: + return put_user(timeout, p); + default: + return -ENOTTY; + } + return 0; +} + +static int sch311x_wdt_open(struct inode *inode, struct file *file) +{ + if (test_and_set_bit(0, &sch311x_wdt_is_open)) + return -EBUSY; + /* + * Activate + */ + sch311x_wdt_start(); + return nonseekable_open(inode, file); +} + +static int sch311x_wdt_close(struct inode *inode, struct file *file) +{ + if (sch311x_wdt_expect_close == 42) { + sch311x_wdt_stop(); + } else { + printk(KERN_CRIT PFX + "Unexpected close, not stopping watchdog!\n"); + sch311x_wdt_keepalive(); + } + clear_bit(0, &sch311x_wdt_is_open); + sch311x_wdt_expect_close = 0; + return 0; +} + +/* + * Kernel Interfaces + */ + +static const struct file_operations sch311x_wdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .write = sch311x_wdt_write, + .unlocked_ioctl = sch311x_wdt_ioctl, + .open = sch311x_wdt_open, + .release = sch311x_wdt_close, +}; + +static struct miscdevice sch311x_wdt_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &sch311x_wdt_fops, +}; + +/* + * Init & exit routines + */ + +static int __devinit sch311x_wdt_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + unsigned char val; + int err; + + spin_lock_init(&sch311x_wdt_data.io_lock); + + if (!request_region(sch311x_wdt_data.runtime_reg + RESGEN, 1, + DRV_NAME)) { + dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n", + sch311x_wdt_data.runtime_reg + RESGEN, + sch311x_wdt_data.runtime_reg + RESGEN); + err = -EBUSY; + goto exit; + } + + if (!request_region(sch311x_wdt_data.runtime_reg + GP60, 1, DRV_NAME)) { + dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n", + sch311x_wdt_data.runtime_reg + GP60, + sch311x_wdt_data.runtime_reg + GP60); + err = -EBUSY; + goto exit_release_region; + } + + if (!request_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4, + DRV_NAME)) { + dev_err(dev, "Failed to request region 0x%04x-0x%04x.\n", + sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, + sch311x_wdt_data.runtime_reg + WDT_CTRL); + err = -EBUSY; + goto exit_release_region2; + } + + /* Make sure that the watchdog is not running */ + sch311x_wdt_stop(); + + /* Disable keyboard and mouse interaction and interrupt */ + /* -- Watchdog timer configuration -- + * Bit 0 Reserved + * Bit 1 Keyboard enable: 0* = No Reset, 1 = Reset WDT upon KBD Intr. + * Bit 2 Mouse enable: 0* = No Reset, 1 = Reset WDT upon Mouse Intr + * Bit 3 Reserved + * Bit 4-7 WDT Interrupt Mapping: (0000* = Disabled, + * 0001=IRQ1, 0010=(Invalid), 0011=IRQ3 to 1111=IRQ15) + */ + outb(0, sch311x_wdt_data.runtime_reg + WDT_CFG); + + /* Check that the heartbeat value is within it's range ; + * if not reset to the default */ + if (sch311x_wdt_set_heartbeat(timeout)) { + sch311x_wdt_set_heartbeat(WATCHDOG_TIMEOUT); + dev_info(dev, "timeout value must be 1<=x<=15300, using %d\n", + timeout); + } + + /* Get status at boot */ + sch311x_wdt_get_status(&sch311x_wdt_data.boot_status); + + /* enable watchdog */ + /* -- Reset Generator -- + * Bit 0 Enable Watchdog Timer Generation: 0* = Enabled, 1 = Disabled + * Bit 1 Thermtrip Source Select: O* = No Source, 1 = Source + * Bit 2 WDT2_CTL: WDT input bit + * Bit 3-7 Reserved + */ + outb(0, sch311x_wdt_data.runtime_reg + RESGEN); + val = therm_trip ? 0x06 : 0x04; + outb(val, sch311x_wdt_data.runtime_reg + RESGEN); + + err = misc_register(&sch311x_wdt_miscdev); + if (err != 0) { + dev_err(dev, "cannot register miscdev on minor=%d (err=%d)\n", + WATCHDOG_MINOR, err); + goto exit_release_region3; + } + + sch311x_wdt_miscdev.parent = dev; + + dev_info(dev, + "SMSC SCH311x WDT initialized. timeout=%d sec (nowayout=%d)\n", + timeout, nowayout); + + return 0; + +exit_release_region3: + release_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4); +exit_release_region2: + release_region(sch311x_wdt_data.runtime_reg + GP60, 1); +exit_release_region: + release_region(sch311x_wdt_data.runtime_reg + RESGEN, 1); + sch311x_wdt_data.runtime_reg = 0; +exit: + return err; +} + +static int __devexit sch311x_wdt_remove(struct platform_device *pdev) +{ + /* Stop the timer before we leave */ + if (!nowayout) + sch311x_wdt_stop(); + + /* Deregister */ + misc_deregister(&sch311x_wdt_miscdev); + release_region(sch311x_wdt_data.runtime_reg + WDT_TIME_OUT, 4); + release_region(sch311x_wdt_data.runtime_reg + GP60, 1); + release_region(sch311x_wdt_data.runtime_reg + RESGEN, 1); + sch311x_wdt_data.runtime_reg = 0; + return 0; +} + +static void sch311x_wdt_shutdown(struct platform_device *dev) +{ + /* Turn the WDT off if we have a soft shutdown */ + sch311x_wdt_stop(); +} + +#define sch311x_wdt_suspend NULL +#define sch311x_wdt_resume NULL + +static struct platform_driver sch311x_wdt_driver = { + .probe = sch311x_wdt_probe, + .remove = __devexit_p(sch311x_wdt_remove), + .shutdown = sch311x_wdt_shutdown, + .suspend = sch311x_wdt_suspend, + .resume = sch311x_wdt_resume, + .driver = { + .owner = THIS_MODULE, + .name = DRV_NAME, + }, +}; + +static int __init sch311x_detect(int sio_config_port, unsigned short *addr) +{ + int err = 0, reg; + unsigned short base_addr; + unsigned char dev_id; + + sch311x_sio_enter(sio_config_port); + + /* Check device ID. We currently know about: + * SCH3112 (0x7c), SCH3114 (0x7d), and SCH3116 (0x7f). */ + reg = force_id ? force_id : sch311x_sio_inb(sio_config_port, 0x20); + if (!(reg == 0x7c || reg == 0x7d || reg == 0x7f)) { + err = -ENODEV; + goto exit; + } + dev_id = reg == 0x7c ? 2 : reg == 0x7d ? 4 : 6; + + /* Select logical device A (runtime registers) */ + sch311x_sio_outb(sio_config_port, 0x07, 0x0a); + + /* Check if Logical Device Register is currently active */ + if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0) + printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n"); + + /* Get the base address of the runtime registers */ + base_addr = (sch311x_sio_inb(sio_config_port, 0x60) << 8) | + sch311x_sio_inb(sio_config_port, 0x61); + if (!base_addr) { + printk(KERN_ERR PFX "Base address not set.\n"); + err = -ENODEV; + goto exit; + } + *addr = base_addr; + + printk(KERN_INFO PFX "Found an SMSC SCH311%d chip at 0x%04x\n", + dev_id, base_addr); + +exit: + sch311x_sio_exit(sio_config_port); + return err; +} + +static int __init sch311x_wdt_init(void) +{ + int err, i, found = 0; + unsigned short addr = 0; + + for (i = 0; !found && sch311x_ioports[i]; i++) + if (sch311x_detect(sch311x_ioports[i], &addr) == 0) + found++; + + if (!found) + return -ENODEV; + + sch311x_wdt_data.runtime_reg = addr; + + err = platform_driver_register(&sch311x_wdt_driver); + if (err) + return err; + + sch311x_wdt_pdev = platform_device_register_simple(DRV_NAME, addr, + NULL, 0); + + if (IS_ERR(sch311x_wdt_pdev)) { + err = PTR_ERR(sch311x_wdt_pdev); + goto unreg_platform_driver; + } + + return 0; + +unreg_platform_driver: + platform_driver_unregister(&sch311x_wdt_driver); + return err; +} + +static void __exit sch311x_wdt_exit(void) +{ + platform_device_unregister(sch311x_wdt_pdev); + platform_driver_unregister(&sch311x_wdt_driver); +} + +module_init(sch311x_wdt_init); +module_exit(sch311x_wdt_exit); + +MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>"); +MODULE_DESCRIPTION("SMSC SCH311x WatchDog Timer Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); + diff --git a/drivers/watchdog/wm8350_wdt.c b/drivers/watchdog/wm8350_wdt.c new file mode 100644 index 000000000000..2bc0d4d4b415 --- /dev/null +++ b/drivers/watchdog/wm8350_wdt.c @@ -0,0 +1,329 @@ +/* + * Watchdog driver for the wm8350 + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics <linux@wolfsonmicro.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/platform_device.h> +#include <linux/watchdog.h> +#include <linux/uaccess.h> +#include <linux/mfd/wm8350/core.h> + +static int nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, int, 0); +MODULE_PARM_DESC(nowayout, + "Watchdog cannot be stopped once started (default=" + __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +static unsigned long wm8350_wdt_users; +static struct miscdevice wm8350_wdt_miscdev; +static int wm8350_wdt_expect_close; +static DEFINE_MUTEX(wdt_mutex); + +static struct { + int time; /* Seconds */ + u16 val; /* To be set in WM8350_SYSTEM_CONTROL_2 */ +} wm8350_wdt_cfgs[] = { + { 1, 0x02 }, + { 2, 0x04 }, + { 4, 0x05 }, +}; + +static struct wm8350 *get_wm8350(void) +{ + return dev_get_drvdata(wm8350_wdt_miscdev.parent); +} + +static int wm8350_wdt_set_timeout(struct wm8350 *wm8350, u16 value) +{ + int ret; + u16 reg; + + mutex_lock(&wdt_mutex); + wm8350_reg_unlock(wm8350); + + reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2); + reg &= ~WM8350_WDOG_TO_MASK; + reg |= value; + ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg); + + wm8350_reg_lock(wm8350); + mutex_unlock(&wdt_mutex); + + return ret; +} + +static int wm8350_wdt_start(struct wm8350 *wm8350) +{ + int ret; + u16 reg; + + mutex_lock(&wdt_mutex); + wm8350_reg_unlock(wm8350); + + reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2); + reg &= ~WM8350_WDOG_MODE_MASK; + reg |= 0x20; + ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg); + + wm8350_reg_lock(wm8350); + mutex_unlock(&wdt_mutex); + + return ret; +} + +static int wm8350_wdt_stop(struct wm8350 *wm8350) +{ + int ret; + u16 reg; + + mutex_lock(&wdt_mutex); + wm8350_reg_unlock(wm8350); + + reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2); + reg &= ~WM8350_WDOG_MODE_MASK; + ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg); + + wm8350_reg_lock(wm8350); + mutex_unlock(&wdt_mutex); + + return ret; +} + +static int wm8350_wdt_kick(struct wm8350 *wm8350) +{ + int ret; + u16 reg; + + mutex_lock(&wdt_mutex); + + reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2); + ret = wm8350_reg_write(wm8350, WM8350_SYSTEM_CONTROL_2, reg); + + mutex_unlock(&wdt_mutex); + + return ret; +} + +static int wm8350_wdt_open(struct inode *inode, struct file *file) +{ + struct wm8350 *wm8350 = get_wm8350(); + int ret; + + if (!wm8350) + return -ENODEV; + + if (test_and_set_bit(0, &wm8350_wdt_users)) + return -EBUSY; + + ret = wm8350_wdt_start(wm8350); + if (ret != 0) + return ret; + + return nonseekable_open(inode, file); +} + +static int wm8350_wdt_release(struct inode *inode, struct file *file) +{ + struct wm8350 *wm8350 = get_wm8350(); + + if (wm8350_wdt_expect_close) + wm8350_wdt_stop(wm8350); + else { + dev_warn(wm8350->dev, "Watchdog device closed uncleanly\n"); + wm8350_wdt_kick(wm8350); + } + + clear_bit(0, &wm8350_wdt_users); + + return 0; +} + +static ssize_t wm8350_wdt_write(struct file *file, + const char __user *data, size_t count, + loff_t *ppos) +{ + struct wm8350 *wm8350 = get_wm8350(); + size_t i; + + if (count) { + wm8350_wdt_kick(wm8350); + + if (!nowayout) { + /* In case it was set long ago */ + wm8350_wdt_expect_close = 0; + + /* scan to see whether or not we got the magic + character */ + for (i = 0; i != count; i++) { + char c; + if (get_user(c, data + i)) + return -EFAULT; + if (c == 'V') + wm8350_wdt_expect_close = 42; + } + } + } + return count; +} + +static struct watchdog_info ident = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, + .identity = "WM8350 Watchdog", +}; + +static long wm8350_wdt_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct wm8350 *wm8350 = get_wm8350(); + int ret = -ENOTTY, time, i; + void __user *argp = (void __user *)arg; + int __user *p = argp; + u16 reg; + + switch (cmd) { + case WDIOC_GETSUPPORT: + ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; + break; + + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + ret = put_user(0, p); + break; + + case WDIOC_SETOPTIONS: + { + int options; + + if (get_user(options, p)) + return -EFAULT; + + ret = -EINVAL; + + /* Setting both simultaneously means at least one must fail */ + if (options == WDIOS_DISABLECARD) + ret = wm8350_wdt_start(wm8350); + + if (options == WDIOS_ENABLECARD) + ret = wm8350_wdt_stop(wm8350); + break; + } + + case WDIOC_KEEPALIVE: + ret = wm8350_wdt_kick(wm8350); + break; + + case WDIOC_SETTIMEOUT: + ret = get_user(time, p); + if (ret) + break; + + if (time == 0) { + if (nowayout) + ret = -EINVAL; + else + wm8350_wdt_stop(wm8350); + break; + } + + for (i = 0; i < ARRAY_SIZE(wm8350_wdt_cfgs); i++) + if (wm8350_wdt_cfgs[i].time == time) + break; + if (i == ARRAY_SIZE(wm8350_wdt_cfgs)) + ret = -EINVAL; + else + ret = wm8350_wdt_set_timeout(wm8350, + wm8350_wdt_cfgs[i].val); + break; + + case WDIOC_GETTIMEOUT: + reg = wm8350_reg_read(wm8350, WM8350_SYSTEM_CONTROL_2); + reg &= WM8350_WDOG_TO_MASK; + for (i = 0; i < ARRAY_SIZE(wm8350_wdt_cfgs); i++) + if (wm8350_wdt_cfgs[i].val == reg) + break; + if (i == ARRAY_SIZE(wm8350_wdt_cfgs)) { + dev_warn(wm8350->dev, + "Unknown watchdog configuration: %x\n", reg); + ret = -EINVAL; + } else + ret = put_user(wm8350_wdt_cfgs[i].time, p); + + } + + return ret; +} + +static const struct file_operations wm8350_wdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .write = wm8350_wdt_write, + .unlocked_ioctl = wm8350_wdt_ioctl, + .open = wm8350_wdt_open, + .release = wm8350_wdt_release, +}; + +static struct miscdevice wm8350_wdt_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &wm8350_wdt_fops, +}; + +static int wm8350_wdt_probe(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = platform_get_drvdata(pdev); + + if (!wm8350) { + dev_err(wm8350->dev, "No driver data supplied\n"); + return -ENODEV; + } + + /* Default to 4s timeout */ + wm8350_wdt_set_timeout(wm8350, 0x05); + + wm8350_wdt_miscdev.parent = &pdev->dev; + + return misc_register(&wm8350_wdt_miscdev); +} + +static int __exit wm8350_wdt_remove(struct platform_device *pdev) +{ + misc_deregister(&wm8350_wdt_miscdev); + + return 0; +} + +static struct platform_driver wm8350_wdt_driver = { + .probe = wm8350_wdt_probe, + .remove = wm8350_wdt_remove, + .driver = { + .name = "wm8350-wdt", + }, +}; + +static int __init wm8350_wdt_init(void) +{ + return platform_driver_register(&wm8350_wdt_driver); +} +module_init(wm8350_wdt_init); + +static void __exit wm8350_wdt_exit(void) +{ + platform_driver_unregister(&wm8350_wdt_driver); +} +module_exit(wm8350_wdt_exit); + +MODULE_AUTHOR("Mark Brown"); +MODULE_DESCRIPTION("WM8350 Watchdog"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:wm8350-wdt"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 46625cd38743..eb0dfdeaa949 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -142,9 +142,6 @@ static void init_evtchn_cpu_bindings(void) /* By default all event channels notify CPU#0. */ for_each_irq_desc(i, desc) { - if (!desc) - continue; - desc->affinity = cpumask_of_cpu(0); } #endif @@ -588,7 +585,7 @@ void rebind_evtchn_irq(int evtchn, int irq) spin_unlock(&irq_mapping_update_lock); /* new event channels are always bound to cpu 0 */ - irq_set_affinity(irq, cpumask_of_cpu(0)); + irq_set_affinity(irq, cpumask_of(0)); /* Unmask the event channel. */ enable_irq(irq); @@ -617,9 +614,9 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) } -static void set_affinity_irq(unsigned irq, cpumask_t dest) +static void set_affinity_irq(unsigned irq, const struct cpumask *dest) { - unsigned tcpu = first_cpu(dest); + unsigned tcpu = cpumask_first(dest); rebind_irq_to_cpu(irq, tcpu); } diff --git a/fs/Kconfig b/fs/Kconfig index 522469a7eca3..ff0e81980207 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -270,44 +270,7 @@ config OCFS2_COMPAT_JBD endif # BLOCK -config DNOTIFY - bool "Dnotify support" - default y - help - Dnotify is a directory-based per-fd file change notification system - that uses signals to communicate events to user-space. There exist - superior alternatives, but some applications may still rely on - dnotify. - - If unsure, say Y. - -config INOTIFY - bool "Inotify file change notification support" - default y - ---help--- - Say Y here to enable inotify support. Inotify is a file change - notification system and a replacement for dnotify. Inotify fixes - numerous shortcomings in dnotify and introduces several new features - including multiple file events, one-shot support, and unmount - notification. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. - -config INOTIFY_USER - bool "Inotify support for userspace" - depends on INOTIFY - default y - ---help--- - Say Y here to enable inotify support for userspace, including the - associated system calls. Inotify allows monitoring of both files and - directories via a single open fd. Events are read from the file - descriptor, which is also select()- and poll()-able. - - For more information, see <file:Documentation/filesystems/inotify.txt> - - If unsure, say Y. +source "fs/notify/Kconfig" config QUOTA bool "Quota support" diff --git a/fs/Makefile b/fs/Makefile index d9f8afe6f0c4..e6f423d1d228 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -20,8 +20,7 @@ obj-y += no-block.o endif obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o -obj-$(CONFIG_INOTIFY) += inotify.o -obj-$(CONFIG_INOTIFY_USER) += inotify_user.o +obj-y += notify/ obj-$(CONFIG_EPOLL) += eventpoll.o obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o @@ -57,8 +56,6 @@ obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o obj-$(CONFIG_QUOTACTL) += quota.o -obj-$(CONFIG_DNOTIFY) += dnotify.o - obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ obj-$(CONFIG_SYSFS) += sysfs/ diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index c16d9be1b017..3bbdb9d02376 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops, if (IS_ERR(anon_inode_inode)) return -ENODEV; + if (fops->owner && !try_module_get(fops->owner)) + return -ENOENT; + error = get_unused_fd_flags(flags); if (error < 0) - return error; + goto err_module; fd = error; /* @@ -128,6 +131,8 @@ err_dput: dput(dentry); err_put_unused_fd: put_unused_fd(fd); +err_module: + module_put(fops->owner); return error; } EXPORT_SYMBOL_GPL(anon_inode_getfd); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 5f1538c03b1b..a05287a23f62 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -132,11 +132,6 @@ static int bad_file_check_flags(int flags) return -EIO; } -static int bad_file_dir_notify(struct file *file, unsigned long arg) -{ - return -EIO; -} - static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl) { return -EIO; @@ -179,7 +174,6 @@ static const struct file_operations bad_file_ops = .sendpage = bad_file_sendpage, .get_unmapped_area = bad_file_get_unmapped_area, .check_flags = bad_file_check_flags, - .dir_notify = bad_file_dir_notify, .flock = bad_file_flock, .splice_write = bad_file_splice_write, .splice_read = bad_file_splice_read, diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b6dfee37c7b7..d06cb023ad02 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -378,7 +378,8 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_size = 0; inode->i_blocks = befs_sb->block_size / VFS_BLOCK_SIZE; strncpy(befs_ino->i_data.symlink, raw_inode->data.symlink, - BEFS_SYMLINK_LEN); + BEFS_SYMLINK_LEN - 1); + befs_ino->i_data.symlink[BEFS_SYMLINK_LEN - 1] = '\0'; } else { int num_blks; @@ -477,6 +478,8 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) kfree(link); befs_error(sb, "Failed to read entire long symlink"); link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; } } else { link = befs_ino->i_data.symlink; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f1f3f4192a60..b639dcf7c778 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -95,92 +95,55 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u int has_dumped = 0; unsigned long dump_start, dump_size; struct user dump; -#if defined(__alpha__) +#ifdef __alpha__ # define START_DATA(u) (u.start_data) -#elif defined(__arm__) +#else # define START_DATA(u) ((u.u_tsize << PAGE_SHIFT) + u.start_code) -#elif defined(__sparc__) -# define START_DATA(u) (u.u_tsize) -#elif defined(__i386__) || defined(__mc68000__) || defined(__arch_um__) -# define START_DATA(u) (u.u_tsize << PAGE_SHIFT) #endif -#ifdef __sparc__ -# define START_STACK(u) ((regs->u_regs[UREG_FP]) & ~(PAGE_SIZE - 1)) -#else # define START_STACK(u) (u.start_stack) -#endif fs = get_fs(); set_fs(KERNEL_DS); has_dumped = 1; current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); -#ifndef __sparc__ dump.u_ar0 = offsetof(struct user, regs); -#endif dump.signal = signr; aout_dump_thread(regs, &dump); /* If the size of the dump file exceeds the rlimit, then see what would happen if we wrote the stack, but not the data area. */ -#ifdef __sparc__ - if ((dump.u_dsize + dump.u_ssize) > limit) - dump.u_dsize = 0; -#else if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit) dump.u_dsize = 0; -#endif /* Make sure we have enough room to write the stack and data areas. */ -#ifdef __sparc__ - if (dump.u_ssize > limit) - dump.u_ssize = 0; -#else if ((dump.u_ssize + 1) * PAGE_SIZE > limit) dump.u_ssize = 0; -#endif /* make sure we actually have a data and stack area to dump */ set_fs(USER_DS); -#ifdef __sparc__ - if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize)) - dump.u_dsize = 0; - if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize)) - dump.u_ssize = 0; -#else if (!access_ok(VERIFY_READ, (void __user *)START_DATA(dump), dump.u_dsize << PAGE_SHIFT)) dump.u_dsize = 0; if (!access_ok(VERIFY_READ, (void __user *)START_STACK(dump), dump.u_ssize << PAGE_SHIFT)) dump.u_ssize = 0; -#endif set_fs(KERNEL_DS); /* struct user */ DUMP_WRITE(&dump,sizeof(dump)); /* Now dump all of the user data. Include malloced stuff as well */ -#ifndef __sparc__ DUMP_SEEK(PAGE_SIZE); -#endif /* now we start writing out the user space info */ set_fs(USER_DS); /* Dump the data area */ if (dump.u_dsize != 0) { dump_start = START_DATA(dump); -#ifdef __sparc__ - dump_size = dump.u_dsize; -#else dump_size = dump.u_dsize << PAGE_SHIFT; -#endif DUMP_WRITE(dump_start,dump_size); } /* Now prepare to dump the stack area */ if (dump.u_ssize != 0) { dump_start = START_STACK(dump); -#ifdef __sparc__ - dump_size = dump.u_ssize; -#else dump_size = dump.u_ssize << PAGE_SHIFT; -#endif DUMP_WRITE(dump_start,dump_size); } /* Finally dump the task struct. Not be used by gdb, but could be useful */ @@ -205,29 +168,24 @@ static unsigned long __user *create_aout_tables(char __user *p, struct linux_bin int envc = bprm->envc; sp = (void __user *)((-(unsigned long)sizeof(char *)) & (unsigned long) p); -#ifdef __sparc__ - /* This imposes the proper stack alignment for a new process. */ - sp = (void __user *) (((unsigned long) sp) & ~7); - if ((envc+argc+3)&1) --sp; -#endif #ifdef __alpha__ /* whee.. test-programs are so much fun. */ put_user(0, --sp); put_user(0, --sp); if (bprm->loader) { put_user(0, --sp); - put_user(0x3eb, --sp); + put_user(1003, --sp); put_user(bprm->loader, --sp); - put_user(0x3ea, --sp); + put_user(1002, --sp); } put_user(bprm->exec, --sp); - put_user(0x3e9, --sp); + put_user(1001, --sp); #endif sp -= envc+1; envp = (char __user * __user *) sp; sp -= argc+1; argv = (char __user * __user *) sp; -#if defined(__i386__) || defined(__mc68000__) || defined(__arm__) || defined(__arch_um__) +#ifndef __alpha__ put_user((unsigned long) envp,--sp); put_user((unsigned long) argv,--sp); #endif @@ -300,13 +258,8 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) return retval; /* OK, This is the point of no return */ -#if defined(__alpha__) +#ifdef __alpha__ SET_AOUT_PERSONALITY(bprm, ex); -#elif defined(__sparc__) - set_personality(PER_SUNOS); -#if !defined(__sparc_v9__) - memcpy(¤t->thread.core_exec, &ex, sizeof(struct exec)); -#endif #else set_personality(PER_LINUX); #endif @@ -322,24 +275,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; -#ifdef __sparc__ - if (N_MAGIC(ex) == NMAGIC) { - loff_t pos = fd_offset; - /* Fuck me plenty... */ - /* <AOL></AOL> */ - down_write(¤t->mm->mmap_sem); - error = do_brk(N_TXTADDR(ex), ex.a_text); - up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_TXTADDR(ex), - ex.a_text, &pos); - down_write(¤t->mm->mmap_sem); - error = do_brk(N_DATADDR(ex), ex.a_data); - up_write(¤t->mm->mmap_sem); - bprm->file->f_op->read(bprm->file, (char *) N_DATADDR(ex), - ex.a_data, &pos); - goto beyond_if; - } -#endif if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; @@ -347,7 +282,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) text_addr = N_TXTADDR(ex); -#if defined(__alpha__) || defined(__sparc__) +#ifdef __alpha__ pos = fd_offset; map_size = ex.a_text+ex.a_data + PAGE_SIZE - 1; #else diff --git a/fs/block_dev.c b/fs/block_dev.c index 99e0ae1a4c78..349a26c10001 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -326,12 +326,13 @@ static struct file_system_type bd_type = { .kill_sb = kill_anon_super, }; -static struct vfsmount *bd_mnt __read_mostly; -struct super_block *blockdev_superblock; +struct super_block *blockdev_superblock __read_mostly; void __init bdev_cache_init(void) { int err; + struct vfsmount *bd_mnt; + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), @@ -373,7 +374,7 @@ struct block_device *bdget(dev_t dev) struct block_device *bdev; struct inode *inode; - inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), + inode = iget5_locked(blockdev_superblock, hash(dev), bdev_test, bdev_set, &dev); if (!inode) @@ -463,7 +464,7 @@ void bd_forget(struct inode *inode) spin_lock(&bdev_lock); if (inode->i_bdev) { - if (inode->i_sb != blockdev_superblock) + if (!sb_is_blkdev_sb(inode->i_sb)) bdev = inode->i_bdev; __bd_forget(inode); } diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 6ba43fb346fb..9948c0030e86 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_CIFS) += cifs.o cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ - md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o \ + md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o cifsacl.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0005a194a75c..13ea53251dcf 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -747,7 +747,6 @@ const struct file_operations cifs_file_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -768,7 +767,6 @@ const struct file_operations cifs_file_direct_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -789,7 +787,6 @@ const struct file_operations cifs_file_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -809,7 +806,6 @@ const struct file_operations cifs_file_direct_nobrl_ops = { #endif /* CONFIG_CIFS_POSIX */ .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -818,9 +814,6 @@ const struct file_operations cifs_dir_ops = { .readdir = cifs_readdir, .release = cifs_closedir, .read = generic_read_dir, -#ifdef CONFIG_CIFS_EXPERIMENTAL - .dir_notify = cifs_dir_notify, -#endif /* CONFIG_CIFS_EXPERIMENTAL */ .unlocked_ioctl = cifs_ioctl, .llseek = generic_file_llseek, }; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 2ce04c73d74e..7ac481841f87 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -76,7 +76,6 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); extern const struct file_operations cifs_dir_ops; extern int cifs_dir_open(struct inode *inode, struct file *file); extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); -extern int cifs_dir_notify(struct file *, unsigned long arg); /* Functions related to dir entries */ extern struct dentry_operations cifs_dentry_ops; diff --git a/fs/cifs/fcntl.c b/fs/cifs/fcntl.c deleted file mode 100644 index 5a57581eb4b2..000000000000 --- a/fs/cifs/fcntl.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * fs/cifs/fcntl.c - * - * vfs operations that deal with the file control API - * - * Copyright (C) International Business Machines Corp., 2003,2004 - * Author(s): Steve French (sfrench@us.ibm.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/fcntl.h> -#include "cifsglob.h" -#include "cifsproto.h" -#include "cifs_unicode.h" -#include "cifs_debug.h" -#include "cifsfs.h" - -static __u32 convert_to_cifs_notify_flags(unsigned long fcntl_notify_flags) -{ - __u32 cifs_ntfy_flags = 0; - - /* No way on Linux VFS to ask to monitor xattr - changes (and no stream support either */ - if (fcntl_notify_flags & DN_ACCESS) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_ACCESS; - if (fcntl_notify_flags & DN_MODIFY) { - /* What does this mean on directories? */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE | - FILE_NOTIFY_CHANGE_SIZE; - } - if (fcntl_notify_flags & DN_CREATE) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_CREATION | - FILE_NOTIFY_CHANGE_LAST_WRITE; - } - if (fcntl_notify_flags & DN_DELETE) - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_LAST_WRITE; - if (fcntl_notify_flags & DN_RENAME) { - /* BB review this - checking various server behaviors */ - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_DIR_NAME | - FILE_NOTIFY_CHANGE_FILE_NAME; - } - if (fcntl_notify_flags & DN_ATTRIB) { - cifs_ntfy_flags |= FILE_NOTIFY_CHANGE_SECURITY | - FILE_NOTIFY_CHANGE_ATTRIBUTES; - } -/* if (fcntl_notify_flags & DN_MULTISHOT) { - cifs_ntfy_flags |= ; - } */ /* BB fixme - not sure how to handle this with CIFS yet */ - - return cifs_ntfy_flags; -} - -int cifs_dir_notify(struct file *file, unsigned long arg) -{ - int xid; - int rc = -EINVAL; - int oplock = 0; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - char *full_path = NULL; - __u32 filter = FILE_NOTIFY_CHANGE_NAME | FILE_NOTIFY_CHANGE_ATTRIBUTES; - __u16 netfid; - - if (experimEnabled == 0) - return 0; - - xid = GetXid(); - cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); - pTcon = cifs_sb->tcon; - - full_path = build_path_from_dentry(file->f_path.dentry); - - if (full_path == NULL) { - rc = -ENOMEM; - } else { - cFYI(1, ("dir notify on file %s Arg 0x%lx", full_path, arg)); - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - GENERIC_READ | SYNCHRONIZE, 0 /* create options */, - &netfid, &oplock, NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - /* BB fixme - add this handle to a notify handle list */ - if (rc) { - cFYI(1, ("Could not open directory for notify")); - } else { - filter = convert_to_cifs_notify_flags(arg); - if (filter != 0) { - rc = CIFSSMBNotify(xid, pTcon, - 0 /* no subdirs */, netfid, - filter, file, arg & DN_MULTISHOT, - cifs_sb->local_nls); - } else { - rc = -EINVAL; - } - /* BB add code to close file eventually (at unmount - it would close automatically but may be a way - to do it easily when inode freed or when - notify info is cleared/changed */ - cFYI(1, ("notify rc %d", rc)); - } - } - - FreeXid(xid); - return rc; -} diff --git a/fs/dcache.c b/fs/dcache.c index a1d86c7f3e66..e88c23b85a32 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -34,7 +34,6 @@ #include <linux/bootmem.h> #include "internal.h" - int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); @@ -948,9 +947,6 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dentry->d_op = NULL; dentry->d_fsdata = NULL; dentry->d_mounted = 0; -#ifdef CONFIG_PROFILING - dentry->d_cookie = NULL; -#endif INIT_HLIST_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); @@ -1336,7 +1332,7 @@ err_out: * * Searches the children of the parent dentry for the name in question. If * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use d_put to free the entry when it has + * is returned. The caller must use dput to free the entry when it has * finished using it. %NULL is returned on failure. * * __d_lookup is dcache_lock free. The hash list is protected using RCU. @@ -1620,8 +1616,11 @@ static void switch_names(struct dentry *dentry, struct dentry *target) */ memcpy(dentry->d_iname, target->d_name.name, target->d_name.len + 1); + dentry->d_name.len = target->d_name.len; + return; } } + do_switch(dentry->d_name.len, target->d_name.len); } /* @@ -1681,7 +1680,6 @@ already_unhashed: /* Switch the names.. */ switch_names(dentry, target); - do_switch(dentry->d_name.len, target->d_name.len); do_switch(dentry->d_name.hash, target->d_name.hash); /* ... and switch the parents */ @@ -1791,7 +1789,6 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) struct dentry *dparent, *aparent; switch_names(dentry, anon); - do_switch(dentry->d_name.len, anon->d_name.len); do_switch(dentry->d_name.hash, anon->d_name.hash); dparent = dentry->d_parent; @@ -1911,7 +1908,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the + * path was too long. * * "buflen" should be positive. Caller holds the dcache_lock. * @@ -1987,7 +1985,10 @@ Elong: * Convert a dentry into an ASCII path name. If the entry has been deleted * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * Returns a pointer into the buffer or an error code if the path was + * too long. Note: Callers should use the returned pointer, not the passed + * in buffer, to use the name! The implementation often starts at an offset + * into the buffer, and may leave 0 bytes at the start. * * "buflen" should be positive. */ @@ -2313,9 +2314,6 @@ static void __init dcache_init(void) /* SLAB cache for __getname() consumers */ struct kmem_cache *names_cachep __read_mostly; -/* SLAB cache for file structures */ -struct kmem_cache *filp_cachep __read_mostly; - EXPORT_SYMBOL(d_genocide); void __init vfs_caches_init_early(void) @@ -2337,9 +2335,6 @@ void __init vfs_caches_init(unsigned long mempages) names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); - dcache_init(); inode_init(); files_init(mempages); diff --git a/fs/dcookies.c b/fs/dcookies.c index 855d4b1d619a..180e9fec4ad8 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -93,10 +93,15 @@ static struct dcookie_struct *alloc_dcookie(struct path *path) { struct dcookie_struct *dcs = kmem_cache_alloc(dcookie_cache, GFP_KERNEL); + struct dentry *d; if (!dcs) return NULL; - path->dentry->d_cookie = dcs; + d = path->dentry; + spin_lock(&d->d_lock); + d->d_flags |= DCACHE_COOKIE; + spin_unlock(&d->d_lock); + dcs->path = *path; path_get(path); hash_dcookie(dcs); @@ -119,14 +124,14 @@ int get_dcookie(struct path *path, unsigned long *cookie) goto out; } - dcs = path->dentry->d_cookie; - - if (!dcs) + if (path->dentry->d_flags & DCACHE_COOKIE) { + dcs = find_dcookie((unsigned long)path->dentry); + } else { dcs = alloc_dcookie(path); - - if (!dcs) { - err = -ENOMEM; - goto out; + if (!dcs) { + err = -ENOMEM; + goto out; + } } *cookie = dcookie_value(dcs); @@ -251,7 +256,12 @@ out_kmem: static void free_dcookie(struct dcookie_struct * dcs) { - dcs->path.dentry->d_cookie = NULL; + struct dentry *d = dcs->path.dentry; + + spin_lock(&d->d_lock); + d->d_flags &= ~DCACHE_COOKIE; + spin_unlock(&d->d_lock); + path_put(&dcs->path); kmem_cache_free(dcookie_cache, dcs); } diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 5d61b7c06e13..fff96e152c0c 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -27,25 +27,32 @@ #define DEVPTS_SUPER_MAGIC 0x1cd1 #define DEVPTS_DEFAULT_MODE 0600 +/* + * ptmx is a new node in /dev/pts and will be unused in legacy (single- + * instance) mode. To prevent surprises in user space, set permissions of + * ptmx to 0. Use 'chmod' or remount with '-o ptmxmode' to set meaningful + * permissions. + */ +#define DEVPTS_DEFAULT_PTMX_MODE 0000 #define PTMX_MINOR 2 extern int pty_limit; /* Config limit on Unix98 ptys */ -static DEFINE_IDA(allocated_ptys); static DEFINE_MUTEX(allocated_ptys_lock); static struct vfsmount *devpts_mnt; -static struct dentry *devpts_root; -static struct { +struct pts_mount_opts { int setuid; int setgid; uid_t uid; gid_t gid; umode_t mode; -} config = {.mode = DEVPTS_DEFAULT_MODE}; + umode_t ptmxmode; + int newinstance; +}; enum { - Opt_uid, Opt_gid, Opt_mode, + Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance, Opt_err }; @@ -53,18 +60,50 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + {Opt_ptmxmode, "ptmxmode=%o"}, + {Opt_newinstance, "newinstance"}, +#endif {Opt_err, NULL} }; -static int devpts_remount(struct super_block *sb, int *flags, char *data) +struct pts_fs_info { + struct ida allocated_ptys; + struct pts_mount_opts mount_opts; + struct dentry *ptmx_dentry; +}; + +static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct super_block *pts_sb_from_inode(struct inode *inode) +{ +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) + return inode->i_sb; +#endif + return devpts_mnt->mnt_sb; +} + +#define PARSE_MOUNT 0 +#define PARSE_REMOUNT 1 + +static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) { char *p; - config.setuid = 0; - config.setgid = 0; - config.uid = 0; - config.gid = 0; - config.mode = DEVPTS_DEFAULT_MODE; + opts->setuid = 0; + opts->setgid = 0; + opts->uid = 0; + opts->gid = 0; + opts->mode = DEVPTS_DEFAULT_MODE; + opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + + /* newinstance makes sense only on initial mount */ + if (op == PARSE_MOUNT) + opts->newinstance = 0; while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -79,20 +118,32 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) case Opt_uid: if (match_int(&args[0], &option)) return -EINVAL; - config.uid = option; - config.setuid = 1; + opts->uid = option; + opts->setuid = 1; break; case Opt_gid: if (match_int(&args[0], &option)) return -EINVAL; - config.gid = option; - config.setgid = 1; + opts->gid = option; + opts->setgid = 1; break; case Opt_mode: if (match_octal(&args[0], &option)) return -EINVAL; - config.mode = option & S_IALLUGO; + opts->mode = option & S_IALLUGO; + break; +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + case Opt_ptmxmode: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->ptmxmode = option & S_IALLUGO; + break; + case Opt_newinstance: + /* newinstance makes sense only on initial mount */ + if (op == PARSE_MOUNT) + opts->newinstance = 1; break; +#endif default: printk(KERN_ERR "devpts: called with bogus options\n"); return -EINVAL; @@ -102,13 +153,108 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) return 0; } +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES +static int mknod_ptmx(struct super_block *sb) +{ + int mode; + int rc = -ENOMEM; + struct dentry *dentry; + struct inode *inode; + struct dentry *root = sb->s_root; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + mutex_lock(&root->d_inode->i_mutex); + + /* If we have already created ptmx node, return */ + if (fsi->ptmx_dentry) { + rc = 0; + goto out; + } + + dentry = d_alloc_name(root, "ptmx"); + if (!dentry) { + printk(KERN_NOTICE "Unable to alloc dentry for ptmx node\n"); + goto out; + } + + /* + * Create a new 'ptmx' node in this mount of devpts. + */ + inode = new_inode(sb); + if (!inode) { + printk(KERN_ERR "Unable to alloc inode for ptmx node\n"); + dput(dentry); + goto out; + } + + inode->i_ino = 2; + inode->i_uid = inode->i_gid = 0; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + mode = S_IFCHR|opts->ptmxmode; + init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); + + d_add(dentry, inode); + + fsi->ptmx_dentry = dentry; + rc = 0; + + printk(KERN_DEBUG "Created ptmx node in devpts ino %lu\n", + inode->i_ino); +out: + mutex_unlock(&root->d_inode->i_mutex); + return rc; +} + +static void update_ptmx_mode(struct pts_fs_info *fsi) +{ + struct inode *inode; + if (fsi->ptmx_dentry) { + inode = fsi->ptmx_dentry->d_inode; + inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; + } +} +#else +static inline void update_ptmx_mode(struct pts_fs_info *fsi) +{ + return; +} +#endif + +static int devpts_remount(struct super_block *sb, int *flags, char *data) +{ + int err; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + err = parse_mount_options(data, PARSE_REMOUNT, opts); + + /* + * parse_mount_options() restores options to default values + * before parsing and may have changed ptmxmode. So, update the + * mode in the inode too. Bogus options don't fail the remount, + * so do this even on error return. + */ + update_ptmx_mode(fsi); + + return err; +} + static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) { - if (config.setuid) - seq_printf(seq, ",uid=%u", config.uid); - if (config.setgid) - seq_printf(seq, ",gid=%u", config.gid); - seq_printf(seq, ",mode=%03o", config.mode); + struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb); + struct pts_mount_opts *opts = &fsi->mount_opts; + + if (opts->setuid) + seq_printf(seq, ",uid=%u", opts->uid); + if (opts->setgid) + seq_printf(seq, ",gid=%u", opts->gid); + seq_printf(seq, ",mode=%03o", opts->mode); +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES + seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); +#endif return 0; } @@ -119,10 +265,25 @@ static const struct super_operations devpts_sops = { .show_options = devpts_show_options, }; +static void *new_pts_fs_info(void) +{ + struct pts_fs_info *fsi; + + fsi = kzalloc(sizeof(struct pts_fs_info), GFP_KERNEL); + if (!fsi) + return NULL; + + ida_init(&fsi->allocated_ptys); + fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE; + fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; + + return fsi; +} + static int devpts_fill_super(struct super_block *s, void *data, int silent) { - struct inode * inode; + struct inode *inode; s->s_blocksize = 1024; s->s_blocksize_bits = 10; @@ -130,9 +291,13 @@ devpts_fill_super(struct super_block *s, void *data, int silent) s->s_op = &devpts_sops; s->s_time_gran = 1; + s->s_fs_info = new_pts_fs_info(); + if (!s->s_fs_info) + goto fail; + inode = new_inode(s); if (!inode) - goto fail; + goto free_fsi; inode->i_ino = 1; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_blocks = 0; @@ -142,27 +307,226 @@ devpts_fill_super(struct super_block *s, void *data, int silent) inode->i_fop = &simple_dir_operations; inode->i_nlink = 2; - devpts_root = s->s_root = d_alloc_root(inode); + s->s_root = d_alloc_root(inode); if (s->s_root) return 0; - - printk("devpts: get root dentry failed\n"); + + printk(KERN_ERR "devpts: get root dentry failed\n"); iput(inode); + +free_fsi: + kfree(s->s_fs_info); fail: return -ENOMEM; } +#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES +static int compare_init_pts_sb(struct super_block *s, void *p) +{ + if (devpts_mnt) + return devpts_mnt->mnt_sb == s; + return 0; +} + +/* + * Safely parse the mount options in @data and update @opts. + * + * devpts ends up parsing options two times during mount, due to the + * two modes of operation it supports. The first parse occurs in + * devpts_get_sb() when determining the mode (single-instance or + * multi-instance mode). The second parse happens in devpts_remount() + * or new_pts_mount() depending on the mode. + * + * Parsing of options modifies the @data making subsequent parsing + * incorrect. So make a local copy of @data and parse it. + * + * Return: 0 On success, -errno on error + */ +static int safe_parse_mount_options(void *data, struct pts_mount_opts *opts) +{ + int rc; + void *datacp; + + if (!data) + return 0; + + /* Use kstrdup() ? */ + datacp = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!datacp) + return -ENOMEM; + + memcpy(datacp, data, PAGE_SIZE); + rc = parse_mount_options((char *)datacp, PARSE_MOUNT, opts); + kfree(datacp); + + return rc; +} + +/* + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. + */ +static int new_pts_mount(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + int err; + struct pts_fs_info *fsi; + struct pts_mount_opts *opts; + + printk(KERN_NOTICE "devpts: newinstance mount\n"); + + err = get_sb_nodev(fs_type, flags, data, devpts_fill_super, mnt); + if (err) + return err; + + fsi = DEVPTS_SB(mnt->mnt_sb); + opts = &fsi->mount_opts; + + err = parse_mount_options(data, PARSE_MOUNT, opts); + if (err) + goto fail; + + err = mknod_ptmx(mnt->mnt_sb); + if (err) + goto fail; + + return 0; + +fail: + dput(mnt->mnt_sb->s_root); + deactivate_super(mnt->mnt_sb); + return err; +} + +/* + * Check if 'newinstance' mount option was specified in @data. + * + * Return: -errno on error (eg: invalid mount options specified) + * : 1 if 'newinstance' mount option was specified + * : 0 if 'newinstance' mount option was NOT specified + */ +static int is_new_instance_mount(void *data) +{ + int rc; + struct pts_mount_opts opts; + + if (!data) + return 0; + + rc = safe_parse_mount_options(data, &opts); + if (!rc) + rc = opts.newinstance; + + return rc; +} + +/* + * get_init_pts_sb() + * + * This interface is needed to support multiple namespace semantics in + * devpts while preserving backward compatibility of the current 'single- + * namespace' semantics. i.e all mounts of devpts without the 'newinstance' + * mount option should bind to the initial kernel mount, like + * get_sb_single(). + * + * Mounts with 'newinstance' option create a new private namespace. + * + * But for single-mount semantics, devpts cannot use get_sb_single(), + * because get_sb_single()/sget() find and use the super-block from + * the most recent mount of devpts. But that recent mount may be a + * 'newinstance' mount and get_sb_single() would pick the newinstance + * super-block instead of the initial super-block. + * + * This interface is identical to get_sb_single() except that it + * consistently selects the 'single-namespace' superblock even in the + * presence of the private namespace (i.e 'newinstance') super-blocks. + */ +static int get_init_pts_sb(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + struct super_block *s; + int error; + + s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); + if (IS_ERR(s)) + return PTR_ERR(s); + + if (!s->s_root) { + s->s_flags = flags; + error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + } + do_remount_sb(s, flags, data, 0); + return simple_set_mnt(mnt, s); +} + +/* + * Mount or remount the initial kernel mount of devpts. This type of + * mount maintains the legacy, single-instance semantics, while the + * kernel still allows multiple-instances. + */ +static int init_pts_mount(struct file_system_type *fs_type, int flags, + void *data, struct vfsmount *mnt) +{ + int err; + + err = get_init_pts_sb(fs_type, flags, data, mnt); + if (err) + return err; + + err = mknod_ptmx(mnt->mnt_sb); + if (err) { + dput(mnt->mnt_sb->s_root); + deactivate_super(mnt->mnt_sb); + } + + return err; +} + static int devpts_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { + int new; + + new = is_new_instance_mount(data); + if (new < 0) + return new; + + if (new) + return new_pts_mount(fs_type, flags, data, mnt); + + return init_pts_mount(fs_type, flags, data, mnt); +} +#else +/* + * This supports only the legacy single-instance semantics (no + * multiple-instance semantics) + */ +static int devpts_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); } +#endif + +static void devpts_kill_sb(struct super_block *sb) +{ + struct pts_fs_info *fsi = DEVPTS_SB(sb); + + kfree(fsi); + kill_litter_super(sb); +} static struct file_system_type devpts_fs_type = { .owner = THIS_MODULE, .name = "devpts", .get_sb = devpts_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = devpts_kill_sb, }; /* @@ -172,16 +536,17 @@ static struct file_system_type devpts_fs_type = { int devpts_new_index(struct inode *ptmx_inode) { + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct pts_fs_info *fsi = DEVPTS_SB(sb); int index; int ida_ret; retry: - if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) { + if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; - } mutex_lock(&allocated_ptys_lock); - ida_ret = ida_get_new(&allocated_ptys, &index); + ida_ret = ida_get_new(&fsi->allocated_ptys, &index); if (ida_ret < 0) { mutex_unlock(&allocated_ptys_lock); if (ida_ret == -EAGAIN) @@ -190,7 +555,7 @@ retry: } if (index >= pty_limit) { - ida_remove(&allocated_ptys, index); + ida_remove(&fsi->allocated_ptys, index); mutex_unlock(&allocated_ptys_lock); return -EIO; } @@ -200,18 +565,26 @@ retry: void devpts_kill_index(struct inode *ptmx_inode, int idx) { + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct pts_fs_info *fsi = DEVPTS_SB(sb); + mutex_lock(&allocated_ptys_lock); - ida_remove(&allocated_ptys, idx); + ida_remove(&fsi->allocated_ptys, idx); mutex_unlock(&allocated_ptys_lock); } int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) { - int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ + /* tty layer puts index from devpts_new_index() in here */ + int number = tty->index; struct tty_driver *driver = tty->driver; dev_t device = MKDEV(driver->major, driver->minor_start+number); struct dentry *dentry; - struct inode *inode = new_inode(devpts_mnt->mnt_sb); + struct super_block *sb = pts_sb_from_inode(ptmx_inode); + struct inode *inode = new_inode(sb); + struct dentry *root = sb->s_root; + struct pts_fs_info *fsi = DEVPTS_SB(sb); + struct pts_mount_opts *opts = &fsi->mount_opts; char s[12]; /* We're supposed to be given the slave end of a pty */ @@ -221,25 +594,25 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) if (!inode) return -ENOMEM; - inode->i_ino = number+2; - inode->i_uid = config.setuid ? config.uid : current_fsuid(); - inode->i_gid = config.setgid ? config.gid : current_fsgid(); + inode->i_ino = number + 3; + inode->i_uid = opts->setuid ? opts->uid : current_fsuid(); + inode->i_gid = opts->setgid ? opts->gid : current_fsgid(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - init_special_inode(inode, S_IFCHR|config.mode, device); + init_special_inode(inode, S_IFCHR|opts->mode, device); inode->i_private = tty; tty->driver_data = inode; sprintf(s, "%d", number); - mutex_lock(&devpts_root->d_inode->i_mutex); + mutex_lock(&root->d_inode->i_mutex); - dentry = d_alloc_name(devpts_root, s); + dentry = d_alloc_name(root, s); if (!IS_ERR(dentry)) { d_add(dentry, inode); - fsnotify_create(devpts_root->d_inode, dentry); + fsnotify_create(root->d_inode, dentry); } - mutex_unlock(&devpts_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); return 0; } @@ -256,20 +629,27 @@ struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) void devpts_pty_kill(struct tty_struct *tty) { struct inode *inode = tty->driver_data; + struct super_block *sb = pts_sb_from_inode(inode); + struct dentry *root = sb->s_root; struct dentry *dentry; BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); - mutex_lock(&devpts_root->d_inode->i_mutex); + mutex_lock(&root->d_inode->i_mutex); dentry = d_find_alias(inode); - if (dentry && !IS_ERR(dentry)) { + if (IS_ERR(dentry)) + goto out; + + if (dentry) { inode->i_nlink--; d_delete(dentry); - dput(dentry); + dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } - mutex_unlock(&devpts_root->d_inode->i_mutex); + dput(dentry); /* d_find_alias above */ +out: + mutex_unlock(&root->d_inode->i_mutex); } static int __init init_devpts_fs(void) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 89209f00f9c7..5e78fc179886 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -673,10 +673,11 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) ecryptfs_printk(KERN_DEBUG, "Calling readlink w/ " "dentry->d_name.name = [%s]\n", dentry->d_name.name); rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); - buf[rc] = '\0'; set_fs(old_fs); if (rc < 0) goto out_free; + else + buf[rc] = '\0'; rc = 0; nd_set_link(nd, buf); goto out; diff --git a/fs/exec.c b/fs/exec.c index 02d2e120542d..3ef9cf9b1871 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -57,11 +57,6 @@ #include <asm/tlb.h> #include "internal.h" -#ifdef __alpha__ -/* for /sbin/loader handling in search_binary_handler() */ -#include <linux/a.out.h> -#endif - int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -127,7 +122,8 @@ asmlinkage long sys_uselib(const char __user * library) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); + error = inode_permission(nd.path.dentry->d_inode, + MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -680,7 +676,7 @@ struct file *open_exec(const char *name) if (nd.path.mnt->mnt_flags & MNT_NOEXEC) goto out_path_put; - err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); + err = inode_permission(nd.path.dentry->d_inode, MAY_EXEC | MAY_OPEN); if (err) goto out_path_put; @@ -1171,41 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; -#ifdef __alpha__ - /* handle /sbin/loader.. */ - { - struct exec * eh = (struct exec *) bprm->buf; - if (!bprm->loader && eh->fh.f_magic == 0x183 && - (eh->fh.f_flags & 0x3000) == 0x3000) - { - struct file * file; - unsigned long loader; - - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; - - loader = bprm->vma->vm_end - sizeof(void *); - - file = open_exec("/sbin/loader"); - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - /* Remember if the application is TASO. */ - bprm->taso = eh->ah.entry < 0x100000000UL; - - bprm->file = file; - bprm->loader = loader; - retval = prepare_binprm(bprm); - if (retval<0) - return retval; - /* should call search_binary_handler recursively here, - but it does not matter */ - } - } -#endif retval = security_bprm_check(bprm); if (retval) return retval; diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 8d0add625870..c454d5db28a5 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -585,7 +585,10 @@ got: spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; @@ -612,6 +615,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); return ERR_PTR(err); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 7658b33e2653..02b39a5deb74 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -32,6 +32,7 @@ #include <linux/buffer_head.h> #include <linux/mpage.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "ext2.h" #include "acl.h" #include "xip.h" @@ -1286,9 +1287,11 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) else inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { - if (ext2_inode_is_fast_symlink(inode)) + if (ext2_inode_is_fast_symlink(inode)) { inode->i_op = &ext2_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext2_symlink_inode_operations; if (test_opt(inode->i_sb, NOBH)) inode->i_mapping->a_ops = &ext2_nobh_aops; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 2a747252ec12..90ea17998a73 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,9 +41,11 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) int err = ext2_add_link(dentry, inode); if (!err) { d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -170,6 +172,7 @@ out: out_fail: inode_dec_link_count(inode); + unlock_new_inode(inode); iput (inode); goto out; } @@ -178,6 +181,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; + int err; if (inode->i_nlink >= EXT2_LINK_MAX) return -EMLINK; @@ -186,7 +190,14 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, inode_inc_link_count(inode); atomic_inc(&inode->i_count); - return ext2_add_nondir(dentry, inode); + err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; } static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) @@ -222,12 +233,14 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) goto out_fail; d_instantiate(dentry, inode); + unlock_new_inode(inode); out: return err; out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); out_dir: inode_dec_link_count(dir); diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 490bd0ed7896..5655fbcbd11f 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -579,7 +579,10 @@ got: ext3_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -627,6 +630,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index f8424ad89971..c4bdccf976b5 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -37,6 +37,7 @@ #include <linux/uio.h> #include <linux/bio.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include "xattr.h" #include "acl.h" @@ -2817,9 +2818,11 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext3_inode_is_fast_symlink(inode)) + if (ext3_inode_is_fast_symlink(inode)) { inode->i_op = &ext3_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext3_symlink_inode_operations; ext3_set_aops(inode); } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 3e5edc92aa0b..297ea8dfac7c 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1652,9 +1652,11 @@ static int ext3_add_nondir(handle_t *handle, if (!err) { ext3_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1765,6 +1767,7 @@ retry: dir_block = ext3_bread (handle, inode, 0, 1, &err); if (!dir_block) { drop_nlink(inode); /* is this nlink == 0? */ + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1792,6 +1795,7 @@ retry: err = ext3_add_entry (handle, dentry, inode); if (err) { inode->i_nlink = 0; + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1800,6 +1804,7 @@ retry: ext3_update_dx_flag(dir); ext3_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) @@ -2174,6 +2179,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { drop_nlink(inode); + unlock_new_inode(inode); ext3_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2221,7 +2227,14 @@ retry: inc_nlink(inode); atomic_inc(&inode->i_count); - err = ext3_add_nondir(handle, dentry, inode); + err = ext3_add_entry(handle, dentry, inode); + if (!err) { + ext3_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 08cac9fcace2..6e6052879aa2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -826,7 +826,10 @@ got: ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) handle->h_sync = 1; - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + err = -EINVAL; + goto fail_drop; + } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); @@ -881,6 +884,7 @@ fail_drop: DQUOT_DROP(inode); inode->i_flags |= S_NOQUOTA; inode->i_nlink = 0; + unlock_new_inode(inode); iput(inode); brelse(bitmap_bh); return ERR_PTR(err); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index be21a5ae33cb..7c3325e0b005 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -34,6 +34,7 @@ #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/mpage.h> +#include <linux/namei.h> #include <linux/uio.h> #include <linux/bio.h> #include "ext4_jbd2.h" @@ -4164,9 +4165,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode)) + if (ext4_inode_is_fast_symlink(inode)) { inode->i_op = &ext4_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 63adcb792988..da98a9012fa5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1693,9 +1693,11 @@ static int ext4_add_nondir(handle_t *handle, if (!err) { ext4_mark_inode_dirty(handle, inode); d_instantiate(dentry, inode); + unlock_new_inode(inode); return 0; } drop_nlink(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -1830,6 +1832,7 @@ retry: if (err) { out_clear_inode: clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -1838,6 +1841,7 @@ out_clear_inode: ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); out_stop: ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) @@ -2212,6 +2216,7 @@ retry: mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { clear_nlink(inode); + unlock_new_inode(inode); ext4_mark_inode_dirty(handle, inode); iput(inode); goto out_stop; @@ -2262,7 +2267,14 @@ retry: ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); - err = ext4_add_nondir(handle, dentry, inode); + err = ext4_add_entry(handle, dentry, inode); + if (!err) { + ext4_mark_inode_dirty(handle, inode); + d_instantiate(dentry, inode); + } else { + drop_nlink(inode); + iput(inode); + } ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 67e058357098..3a7f603b6982 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -841,7 +841,6 @@ const struct file_operations fat_dir_operations = { .compat_ioctl = fat_compat_dir_ioctl, #endif .fsync = file_fsync, - .llseek = generic_file_llseek, }; static int fat_get_short_entry(struct inode *dir, loff_t *pos, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d937aaf77374..6b74d09adbe5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -749,6 +749,8 @@ static struct dentry *fat_get_parent(struct dentry *child) brelse(bh); parent = d_obtain_alias(inode); + if (!IS_ERR(parent)) + parent->d_op = sb->s_root->d_op; out: unlock_super(sb); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index bf326d4356a3..8ae32e37673c 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -78,7 +78,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * for creation. */ if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { - if (nd->flags & LOOKUP_CREATE) + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; } diff --git a/fs/file_table.c b/fs/file_table.c index 0fbcacc3ea75..bbeeac6efa1a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -32,6 +32,9 @@ struct files_stat_struct files_stat = { /* public. Not pretty! */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +/* SLAB cache for file structures */ +static struct kmem_cache *filp_cachep __read_mostly; + static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) @@ -397,7 +400,12 @@ too_bad: void __init files_init(unsigned long mempages) { int n; - /* One file with associated inode and dcache is very roughly 1K. + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + + /* + * One file with associated inode and dcache is very roughly 1K. * Per default don't use more than 10% of our memory for files. */ diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 9f3f2ceb73f0..03a6ea5e99f7 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -325,8 +325,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino) if (!VXFS_ISIMMED(vip)) { ip->i_op = &page_symlink_inode_operations; ip->i_mapping->a_ops = &vxfs_aops; - } else + } else { ip->i_op = &vxfs_immed_symlink_iops; + vip->vii_immed.vi_immed[ip->i_size] = '\0'; + } } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); diff --git a/fs/inode.c b/fs/inode.c index 098a2443196f..7de1cda92489 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1032,6 +1032,65 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) EXPORT_SYMBOL(iget_locked); +int insert_inode_locked(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + ino_t ino = inode->i_ino; + struct hlist_head *head = inode_hashtable + hash(sb, ino); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + while (1) { + spin_lock(&inode_lock); + old = find_inode_fast(sb, head, ino); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked); + +int insert_inode_locked4(struct inode *inode, unsigned long hashval, + int (*test)(struct inode *, void *), void *data) +{ + struct super_block *sb = inode->i_sb; + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *old; + + inode->i_state |= I_LOCK|I_NEW; + + while (1) { + spin_lock(&inode_lock); + old = find_inode(sb, head, test, data); + if (likely(!old)) { + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); + return 0; + } + __iget(old); + spin_unlock(&inode_lock); + wait_on_inode(old); + if (unlikely(!hlist_unhashed(&old->i_hash))) { + iput(old); + return -EBUSY; + } + iput(old); + } +} + +EXPORT_SYMBOL(insert_inode_locked4); + /** * __insert_inode_hash - hash an inode * @inode: unhashed inode diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 70022fd1c539..d4d142c2edd4 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -79,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - return ERR_PTR(-ENOMEM); + rc = -ENOMEM; + goto fail; } jfs_inode = JFS_IP(inode); @@ -89,8 +90,12 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_warn("ialloc: diAlloc returned %d!", rc); if (rc == -EIO) make_bad_inode(inode); - iput(inode); - return ERR_PTR(rc); + goto fail_put; + } + + if (insert_inode_locked(inode) < 0) { + rc = -EINVAL; + goto fail_unlock; } inode->i_uid = current_fsuid(); @@ -112,11 +117,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) * Allocate inode to quota. */ if (DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - return ERR_PTR(-EDQUOT); + rc = -EDQUOT; + goto fail_drop; } inode->i_mode = mode; @@ -158,4 +160,15 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_info("ialloc returns inode = 0x%p\n", inode); return inode; + +fail_drop: + DQUOT_DROP(inode); + inode->i_flags |= S_NOQUOTA; +fail_unlock: + inode->i_nlink = 0; + unlock_new_inode(inode); +fail_put: + iput(inode); +fail: + return ERR_PTR(rc); } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index cc3cedffbfa1..b4de56b851e4 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -155,7 +155,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, ip->i_fop = &jfs_file_operations; ip->i_mapping->a_ops = &jfs_aops; - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -171,9 +170,12 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -289,7 +291,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) ip->i_op = &jfs_dir_inode_operations; ip->i_fop = &jfs_dir_operations; - insert_inode_hash(ip); mark_inode_dirty(ip); /* update parent directory inode */ @@ -306,9 +307,12 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1019,7 +1023,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, goto out3; } - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -1039,9 +1042,12 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out2: free_UCSname(&dname); @@ -1399,7 +1405,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_ip->dev = new_encode_dev(rdev); init_special_inode(ip, ip->i_mode, rdev); - insert_inode_hash(ip); mark_inode_dirty(ip); dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1417,9 +1422,12 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, if (rc) { free_ea_wmap(ip); ip->i_nlink = 0; + unlock_new_inode(ip); iput(ip); - } else + } else { d_instantiate(dentry, ip); + unlock_new_inode(ip); + } out1: free_UCSname(&dname); diff --git a/fs/namei.c b/fs/namei.c index af3783fff1de..dd5c9f0bf829 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -226,6 +226,16 @@ int generic_permission(struct inode *inode, int mask, return -EACCES; } +/** + * inode_permission - check for access rights to a given inode + * @inode: inode to check permission on + * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Used to check for read/write/execute permissions on an inode. + * We use "fsuid" for this, letting us set arbitrary permissions + * for filesystem access without changing the "normal" uids which + * are used for other things. + */ int inode_permission(struct inode *inode, int mask) { int retval; @@ -247,7 +257,6 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - /* Ordinary permission routines do not understand MAY_APPEND. */ if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); else @@ -265,21 +274,6 @@ int inode_permission(struct inode *inode, int mask) } /** - * vfs_permission - check for access rights to a given path - * @nd: lookup result that describes the path - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) - * - * Used to check for read/write/execute permissions on a path. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. - */ -int vfs_permission(struct nameidata *nd, int mask) -{ - return inode_permission(nd->path.dentry->d_inode, mask); -} - -/** * file_permission - check for additional access rights to a given file * @file: file to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) @@ -289,7 +283,7 @@ int vfs_permission(struct nameidata *nd, int mask) * * Note: * Do not use this function in new code. All access checks should - * be done using vfs_permission(). + * be done using inode_permission(). */ int file_permission(struct file *file, int mask) { @@ -527,18 +521,6 @@ out_unlock: return result; } -/* SMP-safe */ -static __always_inline void -walk_init_root(const char *name, struct nameidata *nd) -{ - struct fs_struct *fs = current->fs; - - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); -} - /* * Wrapper to retry pathname resolution whenever the underlying * file system returns an ESTALE. @@ -576,9 +558,16 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l goto fail; if (*link == '/') { + struct fs_struct *fs = current->fs; + path_put(&nd->path); - walk_init_root(link, nd); + + read_lock(&fs->lock); + nd->path = fs->root; + path_get(&fs->root); + read_unlock(&fs->lock); } + res = link_path_walk(link, nd); if (nd->depth || res || nd->last_type!=LAST_NORM) return res; @@ -859,7 +848,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode); if (err == -EAGAIN) - err = vfs_permission(nd, MAY_EXEC); + err = inode_permission(nd->path.dentry->d_inode, + MAY_EXEC); if (err) break; @@ -1493,9 +1483,9 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; } -int may_open(struct nameidata *nd, int acc_mode, int flag) +int may_open(struct path *path, int acc_mode, int flag) { - struct dentry *dentry = nd->path.dentry; + struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; int error; @@ -1516,13 +1506,13 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { flag &= ~O_TRUNC; } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { - if (nd->path.mnt->mnt_flags & MNT_NODEV) + if (path->mnt->mnt_flags & MNT_NODEV) return -EACCES; flag &= ~O_TRUNC; } - error = vfs_permission(nd, acc_mode); + error = inode_permission(inode, acc_mode); if (error) return error; /* @@ -1556,6 +1546,9 @@ int may_open(struct nameidata *nd, int acc_mode, int flag) * Refuse to truncate files with mandatory locks held on them. */ error = locks_verify_locked(inode); + if (!error) + error = security_path_truncate(path, 0, + ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); if (!error) { DQUOT_INIT(inode); @@ -1586,14 +1579,18 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, if (!IS_POSIXACL(dir->d_inode)) mode &= ~current->fs->umask; + error = security_path_mknod(&nd->path, path->dentry, mode, 0); + if (error) + goto out_unlock; error = vfs_create(dir->d_inode, path->dentry, mode, nd); +out_unlock: mutex_unlock(&dir->d_inode->i_mutex); dput(nd->path.dentry); nd->path.dentry = path->dentry; if (error) return error; /* Don't check for write permission, don't truncate */ - return may_open(nd, 0, flag & ~O_TRUNC); + return may_open(&nd->path, 0, flag & ~O_TRUNC); } /* @@ -1779,7 +1776,7 @@ ok: if (error) goto exit; } - error = may_open(&nd, acc_mode, flag); + error = may_open(&nd.path, acc_mode, flag); if (error) { if (will_write) mnt_drop_write(nd.path.mnt); @@ -1999,6 +1996,9 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mknod(&nd.path, dentry, mode, dev); + if (error) + goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); @@ -2011,6 +2011,7 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); break; } +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2070,7 +2071,11 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_mkdir(&nd.path, dentry, mode); + if (error) + goto out_drop_write; error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2180,7 +2185,11 @@ static long do_rmdir(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit3; + error = security_path_rmdir(&nd.path, dentry); + if (error) + goto exit4; error = vfs_rmdir(nd.path.dentry->d_inode, dentry); +exit4: mnt_drop_write(nd.path.mnt); exit3: dput(dentry); @@ -2265,7 +2274,11 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = mnt_want_write(nd.path.mnt); if (error) goto exit2; + error = security_path_unlink(&nd.path, dentry); + if (error) + goto exit3; error = vfs_unlink(nd.path.dentry->d_inode, dentry); +exit3: mnt_drop_write(nd.path.mnt); exit2: dput(dentry); @@ -2346,7 +2359,11 @@ asmlinkage long sys_symlinkat(const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_symlink(&nd.path, dentry, from); + if (error) + goto out_drop_write; error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); @@ -2443,7 +2460,11 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; + error = security_path_link(old_path.dentry, &nd.path, new_dentry); + if (error) + goto out_drop_write; error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); +out_drop_write: mnt_drop_write(nd.path.mnt); out_dput: dput(new_dentry); @@ -2679,8 +2700,13 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, error = mnt_want_write(oldnd.path.mnt); if (error) goto exit5; + error = security_path_rename(&oldnd.path, old_dentry, + &newnd.path, new_dentry); + if (error) + goto exit6; error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); +exit6: mnt_drop_write(oldnd.path.mnt); exit5: dput(new_dentry); @@ -2750,13 +2776,16 @@ int vfs_follow_link(struct nameidata *nd, const char *link) /* get the link contents into pagecache */ static char *page_getlink(struct dentry * dentry, struct page **ppage) { - struct page * page; + char *kaddr; + struct page *page; struct address_space *mapping = dentry->d_inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) return (char*)page; *ppage = page; - return kmap(page); + kaddr = kmap(page); + nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1); + return kaddr; } int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) @@ -2849,7 +2878,6 @@ EXPORT_SYMBOL(path_lookup); EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); -EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); EXPORT_SYMBOL(unlock_rename); EXPORT_SYMBOL(vfs_create); @@ -2865,3 +2893,10 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; diff --git a/fs/namespace.c b/fs/namespace.c index 1c09cab8f7cf..a40685d800a8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1990,7 +1990,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, if (!new_ns->root) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM);; + return ERR_PTR(-ENOMEM); } spin_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); diff --git a/fs/nfsctl.c b/fs/nfsctl.c index b1acbd6ab6fb..b27451909dff 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -38,9 +38,10 @@ static struct file *do_open(char *name, int flags) return ERR_PTR(error); if (flags == O_RDWR) - error = may_open(&nd,MAY_READ|MAY_WRITE,FMODE_READ|FMODE_WRITE); + error = may_open(&nd.path, MAY_READ|MAY_WRITE, + FMODE_READ|FMODE_WRITE); else - error = may_open(&nd, MAY_WRITE, FMODE_WRITE); + error = may_open(&nd.path, MAY_WRITE, FMODE_WRITE); if (!error) return dentry_open(nd.path.dentry, nd.path.mnt, flags, diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig new file mode 100644 index 000000000000..50914d7303c6 --- /dev/null +++ b/fs/notify/Kconfig @@ -0,0 +1,2 @@ +source "fs/notify/dnotify/Kconfig" +source "fs/notify/inotify/Kconfig" diff --git a/fs/notify/Makefile b/fs/notify/Makefile new file mode 100644 index 000000000000..5a95b6010ce7 --- /dev/null +++ b/fs/notify/Makefile @@ -0,0 +1,2 @@ +obj-y += dnotify/ +obj-y += inotify/ diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig new file mode 100644 index 000000000000..26adf5dfa646 --- /dev/null +++ b/fs/notify/dnotify/Kconfig @@ -0,0 +1,10 @@ +config DNOTIFY + bool "Dnotify support" + default y + help + Dnotify is a directory-based per-fd file change notification system + that uses signals to communicate events to user-space. There exist + superior alternatives, but some applications may still rely on + dnotify. + + If unsure, say Y. diff --git a/fs/notify/dnotify/Makefile b/fs/notify/dnotify/Makefile new file mode 100644 index 000000000000..f145251dcadb --- /dev/null +++ b/fs/notify/dnotify/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DNOTIFY) += dnotify.o diff --git a/fs/dnotify.c b/fs/notify/dnotify/dnotify.c index 676073b8dda5..b0aa2cde80bd 100644 --- a/fs/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -115,9 +115,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) dn->dn_next = inode->i_dnotify; inode->i_dnotify = dn; spin_unlock(&inode->i_lock); - - if (filp->f_op && filp->f_op->dir_notify) - return filp->f_op->dir_notify(filp, arg); return 0; out_free: diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig new file mode 100644 index 000000000000..446792841023 --- /dev/null +++ b/fs/notify/inotify/Kconfig @@ -0,0 +1,27 @@ +config INOTIFY + bool "Inotify file change notification support" + default y + ---help--- + Say Y here to enable inotify support. Inotify is a file change + notification system and a replacement for dnotify. Inotify fixes + numerous shortcomings in dnotify and introduces several new features + including multiple file events, one-shot support, and unmount + notification. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. + +config INOTIFY_USER + bool "Inotify support for userspace" + depends on INOTIFY + default y + ---help--- + Say Y here to enable inotify support for userspace, including the + associated system calls. Inotify allows monitoring of both files and + directories via a single open fd. Events are read from the file + descriptor, which is also select()- and poll()-able. + + For more information, see <file:Documentation/filesystems/inotify.txt> + + If unsure, say Y. diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile new file mode 100644 index 000000000000..e290f3bb9d8d --- /dev/null +++ b/fs/notify/inotify/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_INOTIFY) += inotify.o +obj-$(CONFIG_INOTIFY_USER) += inotify_user.o diff --git a/fs/inotify.c b/fs/notify/inotify/inotify.c index dae3f28f30d4..dae3f28f30d4 100644 --- a/fs/inotify.c +++ b/fs/notify/inotify/inotify.c diff --git a/fs/inotify_user.c b/fs/notify/inotify/inotify_user.c index e2425bbd871f..400f8064a548 100644 --- a/fs/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -76,10 +76,10 @@ struct inotify_device { struct mutex ev_mutex; /* protects event queue */ struct mutex up_mutex; /* synchronizes watch updates */ struct list_head events; /* list of queued events */ - atomic_t count; /* reference count */ struct user_struct *user; /* user who opened this dev */ struct inotify_handle *ih; /* inotify handle */ struct fasync_struct *fa; /* async notification */ + atomic_t count; /* reference count */ unsigned int queue_size; /* size of the queue (bytes) */ unsigned int event_count; /* number of pending events */ unsigned int max_events; /* maximum number of events */ diff --git a/fs/open.c b/fs/open.c index c0a426d5766c..1cd7d40e9991 100644 --- a/fs/open.c +++ b/fs/open.c @@ -272,6 +272,8 @@ static long do_sys_truncate(const char __user *pathname, loff_t length) goto put_write_and_out; error = locks_verify_truncate(inode, NULL, length); + if (!error) + error = security_path_truncate(&path, length, 0); if (!error) { DQUOT_INIT(inode); error = do_truncate(path.dentry, length, 0, NULL); @@ -329,6 +331,9 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) error = locks_verify_truncate(inode, file, length); if (!error) + error = security_path_truncate(&file->f_path, length, + ATTR_MTIME|ATTR_CTIME); + if (!error) error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); out_putf: fput(file); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 3bb1cf1e7425..f75efa22df5e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,6 +9,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/irqnr.h> #include <asm/cputime.h> #ifndef arch_irq_stat_cpu @@ -45,10 +46,6 @@ static int show_stat(struct seq_file *p, void *v) steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); for_each_irq_nr(j) { -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) - continue; -#endif sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); @@ -95,12 +92,6 @@ static int show_stat(struct seq_file *p, void *v) /* sum again ? it could be updated? */ for_each_irq_nr(j) { per_irq_sum = 0; -#ifdef CONFIG_SPARSE_IRQ - if (!irq_to_desc(j)) { - seq_printf(p, " %u", per_irq_sum); - continue; - } -#endif for_each_possible_cpu(i) per_irq_sum += kstat_irqs_cpu(j, i); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6c4c2c69449f..145c2d3e5e01 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1753,6 +1753,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *inode) { struct super_block *sb; + struct reiserfs_iget_args args; INITIALIZE_PATH(path_to_key); struct cpu_key key; struct item_head ih; @@ -1780,6 +1781,14 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, err = -ENOMEM; goto out_bad_inode; } + args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); + memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); + args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); + if (insert_inode_locked4(inode, args.objectid, + reiserfs_find_actor, &args) < 0) { + err = -EINVAL; + goto out_bad_inode; + } if (old_format_only(sb)) /* not a perfect generation count, as object ids can be reused, but ** this is as good as reiserfs can do right now. @@ -1859,13 +1868,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, } else { inode2sd(&sd, inode, inode->i_size); } - // these do not go to on-disk stat data - inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid); - // store in in-core inode the key of stat data and version all // object items will have (directory items will have old offset // format, other new objects will consist of new items) - memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode)) set_inode_item_key_version(inode, KEY_FORMAT_3_5); else @@ -1929,7 +1934,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, reiserfs_mark_inode_private(inode); } - insert_inode_hash(inode); reiserfs_update_sd(th, inode); reiserfs_check_path(&path_to_key); @@ -1956,6 +1960,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_inserted_sd: inode->i_nlink = 0; th->t_trans_id = 0; /* so the caller can't use this handle later */ + unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ /* If we were inheriting an ACL, we need to release the lock so that * iput doesn't deadlock in reiserfs_delete_xattrs. The locking diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 4f322e5ed840..738967f6c8ee 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -646,6 +646,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -653,6 +654,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, reiserfs_update_inode_transaction(dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -727,11 +729,13 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -812,6 +816,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = journal_end(&th, dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } @@ -819,6 +824,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) reiserfs_update_sd(&th, dir); d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: if (locked) @@ -1096,11 +1102,13 @@ static int reiserfs_symlink(struct inode *parent_dir, err = journal_end(&th, parent_dir->i_sb, jbegin_count); if (err) retval = err; + unlock_new_inode(inode); iput(inode); goto out_failed; } d_instantiate(dentry, inode); + unlock_new_inode(inode); retval = journal_end(&th, parent_dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock(parent_dir->i_sb); diff --git a/fs/seq_file.c b/fs/seq_file.c index 16c211558c22..99d8b8cfc9b7 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -389,8 +389,14 @@ char *mangle_path(char *s, char *p, char *esc) } EXPORT_SYMBOL(mangle_path); -/* - * return the absolute path of 'dentry' residing in mount 'mnt'. +/** + * seq_path - seq_file interface to print a pathname + * @m: the seq_file handle + * @path: the struct path to print + * @esc: set of characters to escape in the output + * + * return the absolute path of 'path', as represented by the + * dentry / mnt pair in the path parameter. */ int seq_path(struct seq_file *m, struct path *path, char *esc) { diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index df0d435baa48..3d81bf58dae2 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/vfs.h> +#include <linux/namei.h> #include <asm/byteorder.h> #include "sysv.h" @@ -163,8 +164,11 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) if (inode->i_blocks) { inode->i_op = &sysv_symlink_inode_operations; inode->i_mapping->a_ops = &sysv_aops; - } else + } else { inode->i_op = &sysv_fast_symlink_inode_operations; + nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size, + sizeof(SYSV_I(inode)->i_data) - 1); + } } else init_special_inode(inode, inode->i_mode, rdev); } diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 4a18f084cc42..0e5e54d82924 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -32,18 +32,15 @@ #include "ubifs.h" #include <linux/writeback.h> -#include <asm/div64.h> +#include <linux/math64.h> /* * When pessimistic budget calculations say that there is no enough space, * UBIFS starts writing back dirty inodes and pages, doing garbage collection, - * or committing. The below constants define maximum number of times UBIFS + * or committing. The below constant defines maximum number of times UBIFS * repeats the operations. */ -#define MAX_SHRINK_RETRIES 8 -#define MAX_GC_RETRIES 4 -#define MAX_CMT_RETRIES 2 -#define MAX_NOSPC_RETRIES 1 +#define MAX_MKSPC_RETRIES 3 /* * The below constant defines amount of dirty pages which should be written @@ -52,30 +49,6 @@ #define NR_TO_WRITE 16 /** - * struct retries_info - information about re-tries while making free space. - * @prev_liability: previous liability - * @shrink_cnt: how many times the liability was shrinked - * @shrink_retries: count of liability shrink re-tries (increased when - * liability does not shrink) - * @try_gc: GC should be tried first - * @gc_retries: how many times GC was run - * @cmt_retries: how many times commit has been done - * @nospc_retries: how many times GC returned %-ENOSPC - * - * Since we consider budgeting to be the fast-path, and this structure has to - * be allocated on stack and zeroed out, we make it smaller using bit-fields. - */ -struct retries_info { - long long prev_liability; - unsigned int shrink_cnt; - unsigned int shrink_retries:5; - unsigned int try_gc:1; - unsigned int gc_retries:4; - unsigned int cmt_retries:3; - unsigned int nospc_retries:1; -}; - -/** * shrink_liability - write-back some dirty pages/inodes. * @c: UBIFS file-system description object * @nr_to_write: how many dirty pages to write-back @@ -147,9 +120,25 @@ static int run_gc(struct ubifs_info *c) } /** + * get_liability - calculate current liability. + * @c: UBIFS file-system description object + * + * This function calculates and returns current UBIFS liability, i.e. the + * amount of bytes UBIFS has "promised" to write to the media. + */ +static long long get_liability(struct ubifs_info *c) +{ + long long liab; + + spin_lock(&c->space_lock); + liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; + spin_unlock(&c->space_lock); + return liab; +} + +/** * make_free_space - make more free space on the file-system. * @c: UBIFS file-system description object - * @ri: information about previous invocations of this function * * This function is called when an operation cannot be budgeted because there * is supposedly no free space. But in most cases there is some free space: @@ -165,87 +154,42 @@ static int run_gc(struct ubifs_info *c) * Returns %-ENOSPC if it couldn't do more free space, and other negative error * codes on failures. */ -static int make_free_space(struct ubifs_info *c, struct retries_info *ri) +static int make_free_space(struct ubifs_info *c) { - int err; - - /* - * If we have some dirty pages and inodes (liability), try to write - * them back unless this was tried too many times without effect - * already. - */ - if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) { - long long liability; - - spin_lock(&c->space_lock); - liability = c->budg_idx_growth + c->budg_data_growth + - c->budg_dd_growth; - spin_unlock(&c->space_lock); + int err, retries = 0; + long long liab1, liab2; - if (ri->prev_liability >= liability) { - /* Liability does not shrink, next time try GC then */ - ri->shrink_retries += 1; - if (ri->gc_retries < MAX_GC_RETRIES) - ri->try_gc = 1; - dbg_budg("liability did not shrink: retries %d of %d", - ri->shrink_retries, MAX_SHRINK_RETRIES); - } + do { + liab1 = get_liability(c); + /* + * We probably have some dirty pages or inodes (liability), try + * to write them back. + */ + dbg_budg("liability %lld, run write-back", liab1); + shrink_liability(c, NR_TO_WRITE); - dbg_budg("force write-back (count %d)", ri->shrink_cnt); - shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt); + liab2 = get_liability(c); + if (liab2 < liab1) + return -EAGAIN; - ri->prev_liability = liability; - ri->shrink_cnt += 1; - return -EAGAIN; - } + dbg_budg("new liability %lld (not shrinked)", liab2); - /* - * Try to run garbage collector unless it was already tried too many - * times. - */ - if (ri->gc_retries < MAX_GC_RETRIES) { - ri->gc_retries += 1; - dbg_budg("run GC, retries %d of %d", - ri->gc_retries, MAX_GC_RETRIES); - - ri->try_gc = 0; + /* Liability did not shrink again, try GC */ + dbg_budg("Run GC"); err = run_gc(c); if (!err) return -EAGAIN; - if (err == -EAGAIN) { - dbg_budg("GC asked to commit"); - err = ubifs_run_commit(c); - if (err) - return err; - return -EAGAIN; - } - - if (err != -ENOSPC) - return err; - - /* - * GC could not make any progress. If this is the first time, - * then it makes sense to try to commit, because it might make - * some dirty space. - */ - dbg_budg("GC returned -ENOSPC, retries %d", - ri->nospc_retries); - if (ri->nospc_retries >= MAX_NOSPC_RETRIES) + if (err != -EAGAIN && err != -ENOSPC) + /* Some real error happened */ return err; - ri->nospc_retries += 1; - } - /* Neither GC nor write-back helped, try to commit */ - if (ri->cmt_retries < MAX_CMT_RETRIES) { - ri->cmt_retries += 1; - dbg_budg("run commit, retries %d of %d", - ri->cmt_retries, MAX_CMT_RETRIES); + dbg_budg("Run commit (retries %d)", retries); err = ubifs_run_commit(c); if (err) return err; - return -EAGAIN; - } + } while (retries++ < MAX_MKSPC_RETRIES); + return -ENOSPC; } @@ -258,8 +202,8 @@ static int make_free_space(struct ubifs_info *c, struct retries_info *ri) */ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) { - int ret; - uint64_t idx_size; + int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; + long long idx_size; idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; @@ -271,23 +215,16 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) * pair, nor similarly the two variables for the new index size, so we * have to do this costly 64-bit division on fast-path. */ - if (do_div(idx_size, c->leb_size - c->max_idx_node_sz)) - ret = idx_size + 1; - else - ret = idx_size; + idx_size += eff_leb_size - 1; + idx_lebs = div_u64(idx_size, eff_leb_size); /* * The index head is not available for the in-the-gaps method, so add an * extra LEB to compensate. */ - ret += 1; - /* - * At present the index needs at least 2 LEBs: one for the index head - * and one for in-the-gaps method (which currently does not cater for - * the index head and so excludes it from consideration). - */ - if (ret < 2) - ret = 2; - return ret; + idx_lebs += 1; + if (idx_lebs < MIN_INDEX_LEBS) + idx_lebs = MIN_INDEX_LEBS; + return idx_lebs; } /** @@ -530,8 +467,7 @@ static int calc_dd_growth(const struct ubifs_info *c, int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) { int uninitialized_var(cmt_retries), uninitialized_var(wb_retries); - int err, idx_growth, data_growth, dd_growth; - struct retries_info ri; + int err, idx_growth, data_growth, dd_growth, retried = 0; ubifs_assert(req->new_page <= 1); ubifs_assert(req->dirtied_page <= 1); @@ -549,7 +485,6 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) if (!data_growth && !dd_growth) return 0; idx_growth = calc_idx_growth(c, req); - memset(&ri, 0, sizeof(struct retries_info)); again: spin_lock(&c->space_lock); @@ -587,12 +522,17 @@ again: return err; } - err = make_free_space(c, &ri); + err = make_free_space(c); + cond_resched(); if (err == -EAGAIN) { dbg_budg("try again"); - cond_resched(); goto again; } else if (err == -ENOSPC) { + if (!retried) { + retried = 1; + dbg_budg("-ENOSPC, but anyway try once again"); + goto again; + } dbg_budg("FS is full, -ENOSPC"); c->nospace = 1; if (can_use_rp(c) || c->rp_size == 0) @@ -712,9 +652,9 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, * user-space. User-space application tend to expect that if the file-system * (e.g., via the 'statfs()' call) reports that it has N bytes available, they * are able to write a file of size N. UBIFS attaches node headers to each data - * node and it has to write indexind nodes as well. This introduces additional - * overhead, and UBIFS it has to report sligtly less free space to meet the - * above expectetion. + * node and it has to write indexing nodes as well. This introduces additional + * overhead, and UBIFS has to report slightly less free space to meet the above + * expectations. * * This function assumes free space is made up of uncompressed data nodes and * full index nodes (one per data node, tripled because we always allow enough @@ -723,7 +663,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, * Note, the calculation is pessimistic, which means that most of the time * UBIFS reports less space than it actually has. */ -long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) +long long ubifs_reported_space(const struct ubifs_info *c, long long free) { int divisor, factor, f; @@ -737,7 +677,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) * of data nodes, f - fanout. Because effective UBIFS fanout is twice * as less than maximum fanout, we assume that each data node * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. - * Note, the multiplier 3 is because UBIFS reseves thrice as more space + * Note, the multiplier 3 is because UBIFS reserves thrice as more space * for the index. */ f = c->fanout > 3 ? c->fanout >> 1 : 2; @@ -745,8 +685,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) divisor = UBIFS_MAX_DATA_NODE_SZ; divisor += (c->max_idx_node_sz * 3) / (f - 1); free *= factor; - do_div(free, divisor); - return free; + return div_u64(free, divisor); } /** @@ -756,10 +695,10 @@ long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) * This function calculates amount of free space to report to user-space. * * Because UBIFS may introduce substantial overhead (the index, node headers, - * alighment, wastage at the end of eraseblocks, etc), it cannot report real + * alignment, wastage at the end of eraseblocks, etc), it cannot report real * amount of free flash space it has (well, because not all dirty space is - * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, - * it would bread user expectetion about what free space is. Users seem to + * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, + * it would bread user expectations about what free space is. Users seem to * accustomed to assume that if the file-system reports N bytes of free space, * they would be able to fit a file of N bytes to the FS. This almost works for * traditional file-systems, because they have way less overhead than UBIFS. @@ -771,18 +710,9 @@ long long ubifs_get_free_space(struct ubifs_info *c) long long available, outstanding, free; spin_lock(&c->space_lock); - min_idx_lebs = ubifs_calc_min_idx_lebs(c); + min_idx_lebs = c->min_idx_lebs; + ubifs_assert(min_idx_lebs == ubifs_calc_min_idx_lebs(c)); outstanding = c->budg_data_growth + c->budg_dd_growth; - - /* - * Force the amount available to the total size reported if the used - * space is zero. - */ - if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { - spin_unlock(&c->space_lock); - return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; - } - available = ubifs_calc_available(c, min_idx_lebs); /* diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index b49884c8c10e..f3a7945527fb 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -470,12 +470,12 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) { struct ubifs_idx_node *idx; int lnum, offs, len, err = 0; + struct ubifs_debug_info *d = c->dbg; - c->old_zroot = *zroot; - - lnum = c->old_zroot.lnum; - offs = c->old_zroot.offs; - len = c->old_zroot.len; + d->old_zroot = *zroot; + lnum = d->old_zroot.lnum; + offs = d->old_zroot.offs; + len = d->old_zroot.len; idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); if (!idx) @@ -485,8 +485,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) if (err) goto out; - c->old_zroot_level = le16_to_cpu(idx->level); - c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); + d->old_zroot_level = le16_to_cpu(idx->level); + d->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); out: kfree(idx); return err; @@ -509,6 +509,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) { int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; int first = 1, iip; + struct ubifs_debug_info *d = c->dbg; union ubifs_key lower_key, upper_key, l_key, u_key; unsigned long long uninitialized_var(last_sqnum); struct ubifs_idx_node *idx; @@ -525,9 +526,9 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) UBIFS_IDX_NODE_SZ; /* Start at the old zroot */ - lnum = c->old_zroot.lnum; - offs = c->old_zroot.offs; - len = c->old_zroot.len; + lnum = d->old_zroot.lnum; + offs = d->old_zroot.offs; + len = d->old_zroot.len; iip = 0; /* @@ -560,11 +561,11 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) if (first) { first = 0; /* Check root level and sqnum */ - if (le16_to_cpu(idx->level) != c->old_zroot_level) { + if (le16_to_cpu(idx->level) != d->old_zroot_level) { err = 2; goto out_dump; } - if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { + if (le64_to_cpu(idx->ch.sqnum) != d->old_zroot_sqnum) { err = 3; goto out_dump; } diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index a0ada596b17c..11e4132f314a 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c @@ -33,7 +33,7 @@ /* Fake description object for the "none" compressor */ static struct ubifs_compressor none_compr = { .compr_type = UBIFS_COMPR_NONE, - .name = "no compression", + .name = "none", .capi_name = "", }; @@ -43,13 +43,13 @@ static DEFINE_MUTEX(lzo_mutex); static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, .comp_mutex = &lzo_mutex, - .name = "LZO", + .name = "lzo", .capi_name = "lzo", }; #else static struct ubifs_compressor lzo_compr = { .compr_type = UBIFS_COMPR_LZO, - .name = "LZO", + .name = "lzo", }; #endif @@ -108,7 +108,7 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, if (compr->comp_mutex) mutex_lock(compr->comp_mutex); err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf, - out_len); + (unsigned int *)out_len); if (compr->comp_mutex) mutex_unlock(compr->comp_mutex); if (unlikely(err)) { @@ -119,10 +119,10 @@ void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, } /* - * Presently, we just require that compression results in less data, - * rather than any defined minimum compression ratio or amount. + * If the data compressed only slightly, it is better to leave it + * uncompressed to improve read speed. */ - if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8)) + if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) goto no_compr; return; @@ -172,7 +172,7 @@ int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, if (compr->decomp_mutex) mutex_lock(compr->decomp_mutex); err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf, - out_len); + (unsigned int *)out_len); if (compr->decomp_mutex) mutex_unlock(compr->decomp_mutex); if (err) @@ -244,7 +244,7 @@ out_lzo: /** * ubifs_compressors_exit - de-initialize UBIFS compressors. */ -void __exit ubifs_compressors_exit(void) +void ubifs_compressors_exit(void) { compr_exit(&lzo_compr); compr_exit(&zlib_compr); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 510ffa0bbda4..792c5a16c182 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -32,6 +32,8 @@ #include "ubifs.h" #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/debugfs.h> +#include <linux/math64.h> #ifdef CONFIG_UBIFS_FS_DEBUG @@ -596,7 +598,9 @@ void dbg_dump_budg(struct ubifs_info *c) struct rb_node *rb; struct ubifs_bud *bud; struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + ubifs_assert(spin_is_locked(&c->space_lock)); spin_lock(&dbg_lock); printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, @@ -629,6 +633,17 @@ void dbg_dump_budg(struct ubifs_info *c) printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", idx_gc->lnum, idx_gc->unmap); printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ + available = ubifs_calc_available(c, c->min_idx_lebs); + outstanding = c->budg_data_growth + c->budg_dd_growth; + if (available > outstanding) + free = ubifs_reported_space(c, available - outstanding); + else + free = 0; + printk(KERN_DEBUG "Budgeting predictions:\n"); + printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); spin_unlock(&dbg_lock); } @@ -645,7 +660,8 @@ void dbg_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); + printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", + current->pid); ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); @@ -656,6 +672,8 @@ void dbg_dump_lprops(struct ubifs_info *c) dbg_dump_lprop(c, &lp); } + printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", + current->pid); } void dbg_dump_lpt_info(struct ubifs_info *c) @@ -663,6 +681,7 @@ void dbg_dump_lpt_info(struct ubifs_info *c) int i; spin_lock(&dbg_lock); + printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); @@ -684,7 +703,8 @@ void dbg_dump_lpt_info(struct ubifs_info *c) printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); printk(KERN_DEBUG "\tLPT head is at %d:%d\n", c->nhead_lnum, c->nhead_offs); - printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); + printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", + c->ltab_lnum, c->ltab_offs); if (c->big_lpt) printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", c->lsave_lnum, c->lsave_offs); @@ -703,9 +723,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) if (dbg_failure_mode) return; - printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); - - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { ubifs_err("scan error %d", (int)PTR_ERR(sleb)); return; @@ -721,6 +741,8 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) dbg_dump_node(c, snod->node); } + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); ubifs_scan_destroy(sleb); return; } @@ -768,7 +790,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", + printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; @@ -777,6 +799,7 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) "flags %d\n", i, lprops->lnum, lprops->hpos, lprops->free, lprops->dirty, lprops->flags); } + printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); } void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, @@ -784,7 +807,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); + printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", @@ -803,7 +826,7 @@ void dbg_dump_tnc(struct ubifs_info *c) int level; printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); + printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; printk(KERN_DEBUG "== Level %d ==\n", level); @@ -815,8 +838,7 @@ void dbg_dump_tnc(struct ubifs_info *c) dbg_dump_znode(c, znode); znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); } - - printk(KERN_DEBUG "\n"); + printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); } static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, @@ -992,8 +1014,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, zbr1->offs, DBGKEY(&key)); dbg_err("but it should have key %s according to tnc", DBGKEY(&zbr1->key)); - dbg_dump_node(c, dent1); - goto out_free; + dbg_dump_node(c, dent1); + goto out_free; } key_read(c, &dent2->key, &key); @@ -1002,8 +1024,8 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, zbr1->offs, DBGKEY(&key)); dbg_err("but it should have key %s according to tnc", DBGKEY(&zbr2->key)); - dbg_dump_node(c, dent2); - goto out_free; + dbg_dump_node(c, dent2); + goto out_free; } nlen1 = le16_to_cpu(dent1->nlen); @@ -1020,9 +1042,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_err("bad order of colliding key %s", DBGKEY(&key)); - dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); dbg_dump_node(c, dent1); - dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); + ubifs_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs); dbg_dump_node(c, dent2); out_free: @@ -2097,13 +2119,13 @@ static int simple_rand(void) return (next >> 16) & 32767; } -void dbg_failure_mode_registration(struct ubifs_info *c) +static void failure_mode_init(struct ubifs_info *c) { struct failure_mode_info *fmi; fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); if (!fmi) { - dbg_err("Failed to register failure mode - no memory"); + ubifs_err("Failed to register failure mode - no memory"); return; } fmi->c = c; @@ -2112,7 +2134,7 @@ void dbg_failure_mode_registration(struct ubifs_info *c) spin_unlock(&fmi_lock); } -void dbg_failure_mode_deregistration(struct ubifs_info *c) +static void failure_mode_exit(struct ubifs_info *c) { struct failure_mode_info *fmi, *tmp; @@ -2146,42 +2168,44 @@ static int in_failure_mode(struct ubi_volume_desc *desc) struct ubifs_info *c = dbg_find_info(desc); if (c && dbg_failure_mode) - return c->failure_mode; + return c->dbg->failure_mode; return 0; } static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) { struct ubifs_info *c = dbg_find_info(desc); + struct ubifs_debug_info *d; if (!c || !dbg_failure_mode) return 0; - if (c->failure_mode) + d = c->dbg; + if (d->failure_mode) return 1; - if (!c->fail_cnt) { + if (!d->fail_cnt) { /* First call - decide delay to failure */ if (chance(1, 2)) { unsigned int delay = 1 << (simple_rand() >> 11); if (chance(1, 2)) { - c->fail_delay = 1; - c->fail_timeout = jiffies + + d->fail_delay = 1; + d->fail_timeout = jiffies + msecs_to_jiffies(delay); dbg_rcvry("failing after %ums", delay); } else { - c->fail_delay = 2; - c->fail_cnt_max = delay; + d->fail_delay = 2; + d->fail_cnt_max = delay; dbg_rcvry("failing after %u calls", delay); } } - c->fail_cnt += 1; + d->fail_cnt += 1; } /* Determine if failure delay has expired */ - if (c->fail_delay == 1) { - if (time_before(jiffies, c->fail_timeout)) + if (d->fail_delay == 1) { + if (time_before(jiffies, d->fail_timeout)) return 0; - } else if (c->fail_delay == 2) - if (c->fail_cnt++ < c->fail_cnt_max) + } else if (d->fail_delay == 2) + if (d->fail_cnt++ < d->fail_cnt_max) return 0; if (lnum == UBIFS_SB_LNUM) { if (write) { @@ -2239,7 +2263,7 @@ static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) dbg_rcvry("failing in bud LEB %d commit not running", lnum); } ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); - c->failure_mode = 1; + d->failure_mode = 1; dump_stack(); return 1; } @@ -2344,4 +2368,181 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) return 0; } +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ + c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); + if (!c->dbg) + return -ENOMEM; + + c->dbg->buf = vmalloc(c->leb_size); + if (!c->dbg->buf) + goto out; + + failure_mode_init(c); + return 0; + +out: + kfree(c->dbg); + return -ENOMEM; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ + failure_mode_exit(c); + vfree(c->dbg->buf); + kfree(c->dbg); +} + +/* + * Root directory for UBIFS stuff in debugfs. Contains sub-directories which + * contain the stuff specific to particular file-system mounts. + */ +static struct dentry *debugfs_rootdir; + +/** + * dbg_debugfs_init - initialize debugfs file-system. + * + * UBIFS uses debugfs file-system to expose various debugging knobs to + * user-space. This function creates "ubifs" directory in the debugfs + * file-system. Returns zero in case of success and a negative error code in + * case of failure. + */ +int dbg_debugfs_init(void) +{ + debugfs_rootdir = debugfs_create_dir("ubifs", NULL); + if (IS_ERR(debugfs_rootdir)) { + int err = PTR_ERR(debugfs_rootdir); + ubifs_err("cannot create \"ubifs\" debugfs directory, " + "error %d\n", err); + return err; + } + + return 0; +} + +/** + * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. + */ +void dbg_debugfs_exit(void) +{ + debugfs_remove(debugfs_rootdir); +} + +static int open_debugfs_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t write_debugfs_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; + + if (file->f_path.dentry == d->dump_lprops) + dbg_dump_lprops(c); + else if (file->f_path.dentry == d->dump_budg) { + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + } else if (file->f_path.dentry == d->dump_tnc) { + mutex_lock(&c->tnc_mutex); + dbg_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); + } else + return -EINVAL; + + *ppos += count; + return count; +} + +static const struct file_operations debugfs_fops = { + .open = open_debugfs_file, + .write = write_debugfs_file, + .owner = THIS_MODULE, +}; + +/** + * dbg_debugfs_init_fs - initialize debugfs for UBIFS instance. + * @c: UBIFS file-system description object + * + * This function creates all debugfs files for this instance of UBIFS. Returns + * zero in case of success and a negative error code in case of failure. + * + * Note, the only reason we have not merged this function with the + * 'ubifs_debugging_init()' function is because it is better to initialize + * debugfs interfaces at the very end of the mount process, and remove them at + * the very beginning of the mount process. + */ +int dbg_debugfs_init_fs(struct ubifs_info *c) +{ + int err; + const char *fname; + struct dentry *dent; + struct ubifs_debug_info *d = c->dbg; + + sprintf(d->debugfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); + d->debugfs_dir = debugfs_create_dir(d->debugfs_dir_name, + debugfs_rootdir); + if (IS_ERR(d->debugfs_dir)) { + err = PTR_ERR(d->debugfs_dir); + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + d->debugfs_dir_name, err); + goto out; + } + + fname = "dump_lprops"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_lprops = dent; + + fname = "dump_budg"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_budg = dent; + + fname = "dump_tnc"; + dent = debugfs_create_file(fname, S_IWUGO, d->debugfs_dir, c, + &debugfs_fops); + if (IS_ERR(dent)) + goto out_remove; + d->dump_tnc = dent; + + return 0; + +out_remove: + err = PTR_ERR(dent); + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", + fname, err); + debugfs_remove_recursive(d->debugfs_dir); +out: + return err; +} + +/** + * dbg_debugfs_exit_fs - remove all debugfs files. + * @c: UBIFS file-system description object + */ +void dbg_debugfs_exit_fs(struct ubifs_info *c) +{ + debugfs_remove_recursive(c->dbg->debugfs_dir); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 33d6b95071e4..9820d6999f7e 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -25,7 +25,56 @@ #ifdef CONFIG_UBIFS_FS_DEBUG -#define UBIFS_DBG(op) op +/** + * ubifs_debug_info - per-FS debugging information. + * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker + * @chk_lpt_lebs: used by LPT tree size checker + * @new_nhead_offs: used by LPT tree size checker + * @new_ihead_lnum: used by debugging to check ihead_lnum + * @new_ihead_offs: used by debugging to check ihead_offs + * + * debugfs_dir_name: name of debugfs directory containing this file-system's + * files + * debugfs_dir: direntry object of the file-system debugfs directory + * dump_lprops: "dump lprops" debugfs knob + * dump_budg: "dump budgeting information" debugfs knob + * dump_tnc: "dump TNC" debugfs knob + */ +struct ubifs_debug_info { + void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; + int failure_mode; + int fail_delay; + unsigned long fail_timeout; + unsigned int fail_cnt; + unsigned int fail_cnt_max; + long long chk_lpt_sz; + long long chk_lpt_sz2; + long long chk_lpt_wastage; + int chk_lpt_lebs; + int new_nhead_offs; + int new_ihead_lnum; + int new_ihead_offs; + + char debugfs_dir_name[100]; + struct dentry *debugfs_dir; + struct dentry *dump_lprops; + struct dentry *dump_budg; + struct dentry *dump_tnc; +}; #define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ @@ -211,14 +260,18 @@ extern unsigned int ubifs_msg_flags; extern unsigned int ubifs_chk_flags; extern unsigned int ubifs_tst_flags; -/* Dump functions */ +int ubifs_debugging_init(struct ubifs_info *c); +void ubifs_debugging_exit(struct ubifs_info *c); +/* Dump functions */ const char *dbg_ntype(int type); const char *dbg_cstate(int cmt_state); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); void dbg_dump_node(const struct ubifs_info *c, const void *node); +void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, + int offs); void dbg_dump_budget_req(const struct ubifs_budget_req *req); void dbg_dump_lstats(const struct ubifs_lp_stats *lst); void dbg_dump_budg(struct ubifs_info *c); @@ -233,9 +286,9 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip); void dbg_dump_tnc(struct ubifs_info *c); void dbg_dump_index(struct ubifs_info *c); +void dbg_dump_lpt_lebs(const struct ubifs_info *c); /* Checking helper functions */ - typedef int (*dbg_leaf_callback)(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *priv); typedef int (*dbg_znode_callback)(struct ubifs_info *c, @@ -274,9 +327,6 @@ int dbg_force_in_the_gaps(void); #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) -void dbg_failure_mode_registration(struct ubifs_info *c); -void dbg_failure_mode_deregistration(struct ubifs_info *c); - #ifndef UBIFS_DBG_PRESERVE_UBI #define ubi_leb_read dbg_leb_read @@ -318,9 +368,13 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); } -#else /* !CONFIG_UBIFS_FS_DEBUG */ +/* Debugfs-related stuff */ +int dbg_debugfs_init(void); +void dbg_debugfs_exit(void); +int dbg_debugfs_init_fs(struct ubifs_info *c); +void dbg_debugfs_exit_fs(struct ubifs_info *c); -#define UBIFS_DBG(op) +#else /* !CONFIG_UBIFS_FS_DEBUG */ /* Use "if (0)" to make compiler check arguments even if debugging is off */ #define ubifs_assert(expr) do { \ @@ -360,23 +414,28 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_lpt_info(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) +#define ubifs_debugging_init(c) 0 +#define ubifs_debugging_exit(c) ({}) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_lpt_info(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) +#define dbg_dump_lpt_lebs(c) ({}) #define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 @@ -396,9 +455,11 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 #define dbg_failure_mode 0 -#define dbg_failure_mode_registration(c) ({}) -#define dbg_failure_mode_deregistration(c) ({}) -#endif /* !CONFIG_UBIFS_FS_DEBUG */ +#define dbg_debugfs_init() 0 +#define dbg_debugfs_exit() +#define dbg_debugfs_init_fs(c) 0 +#define dbg_debugfs_exit_fs(c) 0 +#endif /* !CONFIG_UBIFS_FS_DEBUG */ #endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 2624411d9758..fe82d2464d46 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -72,8 +72,8 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, return err; } - ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); - + ubifs_assert(le64_to_cpu(dn->ch.sqnum) > + ubifs_inode(inode)->creat_sqnum); len = le32_to_cpu(dn->size); if (len <= 0 || len > UBIFS_BLOCK_SIZE) goto dump; @@ -254,7 +254,7 @@ static int write_begin_slow(struct address_space *mapping, } if (!PageUptodate(page)) { - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) SetPageChecked(page); else { err = do_readpage(page); @@ -444,7 +444,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, if (!PageUptodate(page)) { /* The page is not loaded from the flash */ - if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 5e82cffe9695..6db7a6be6c97 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -154,6 +154,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GETFLAGS: flags = ubifs2ioctl(ubifs_inode(inode)->flags); + dbg_gen("get flags: %#x, i_flags %#x", flags, inode->i_flags); return put_user(flags, (int __user *) arg); case FS_IOC_SETFLAGS: { @@ -176,6 +177,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) err = mnt_want_write(file->f_path.mnt); if (err) return err; + dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); err = setflags(inode, flags); mnt_drop_write(file->f_path.mnt); return err; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index f91b745908ea..10ae25b7d1db 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -704,7 +704,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, data->size = cpu_to_le32(len); zero_data_node_unused(data); - if (!(ui->flags && UBIFS_COMPR_FL)) + if (!(ui->flags & UBIFS_COMPR_FL)) /* Compression is disabled for this inode */ compr_type = UBIFS_COMPR_NONE; else @@ -1220,7 +1220,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, data_key_init(c, &key, inum, blk); bit = old_size & (UBIFS_BLOCK_SIZE - 1); - blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1); + blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0 : 1); data_key_init(c, &to_key, inum, blk); err = ubifs_tnc_remove_range(c, &key, &to_key); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 3f1f16bc25c9..efb3430a2581 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -38,6 +38,22 @@ #define __UBIFS_KEY_H__ /** + * key_mask_hash - mask a valid hash value. + * @val: value to be masked + * + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This + * function makes sure the reserved values are not used. + */ +static inline uint32_t key_mask_hash(uint32_t hash) +{ + hash &= UBIFS_S_KEY_HASH_MASK; + if (unlikely(hash <= 2)) + hash += 3; + return hash; +} + +/** * key_r5_hash - R5 hash function (borrowed from reiserfs). * @s: direntry name * @len: name length @@ -54,16 +70,7 @@ static inline uint32_t key_r5_hash(const char *s, int len) str++; } - a &= UBIFS_S_KEY_HASH_MASK; - - /* - * We use hash values as offset in directories, so values %0 and %1 are - * reserved for "." and "..". %2 is reserved for "end of readdir" - * marker. - */ - if (unlikely(a >= 0 && a <= 2)) - a += 3; - return a; + return key_mask_hash(a); } /** @@ -77,10 +84,7 @@ static inline uint32_t key_test_hash(const char *str, int len) len = min_t(uint32_t, len, 4); memcpy(&a, str, len); - a &= UBIFS_S_KEY_HASH_MASK; - if (unlikely(a >= 0 && a <= 2)) - a += 3; - return a; + return key_mask_hash(a); } /** diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index f27176e9b70d..dfd2bcece27a 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -520,13 +520,13 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) * @flags: new flags * @idx_gc_cnt: change to the count of idx_gc list * - * This function changes LEB properties. This function does not change a LEB - * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC. + * This function changes LEB properties (@free, @dirty or @flag). However, the + * property which has the %LPROPS_NC value is not changed. Returns a pointer to + * the updated LEB properties on success and a negative error code on failure. * - * This function returns a pointer to the updated LEB properties on success - * and a negative error code on failure. N.B. the LEB properties may have had to - * be copied (due to COW) and consequently the pointer returned may not be the - * same as the pointer passed. + * Note, the LEB properties may have had to be copied (due to COW) and + * consequently the pointer returned may not be the same as the pointer + * passed. */ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, const struct ubifs_lprops *lp, @@ -1088,7 +1088,7 @@ static int scan_check_cb(struct ubifs_info *c, } } - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { /* * After an unclean unmount, empty and freeable LEBs diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index db8bd0e518b2..b2792e84d245 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -36,15 +36,16 @@ * can be written into a single eraseblock. In that case, garbage collection * consists of just writing the whole table, which therefore makes all other * eraseblocks reusable. In the case of the big model, dirty eraseblocks are - * selected for garbage collection, which consists are marking the nodes in + * selected for garbage collection, which consists of marking the clean nodes in * that LEB as dirty, and then only the dirty nodes are written out. Also, in * the case of the big model, a table of LEB numbers is saved so that the entire * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first * mounted. */ -#include <linux/crc16.h> #include "ubifs.h" +#include <linux/crc16.h> +#include <linux/math64.h> /** * do_calc_lpt_geom - calculate sizes for the LPT area. @@ -135,15 +136,13 @@ static void do_calc_lpt_geom(struct ubifs_info *c) int ubifs_calc_lpt_geom(struct ubifs_info *c) { int lebs_needed; - uint64_t sz; + long long sz; do_calc_lpt_geom(c); /* Verify that lpt_lebs is big enough */ sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ - sz += c->leb_size - 1; - do_div(sz, c->leb_size); - lebs_needed = sz; + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); if (lebs_needed > c->lpt_lebs) { ubifs_err("too few LPT LEBs"); return -EINVAL; @@ -156,7 +155,6 @@ int ubifs_calc_lpt_geom(struct ubifs_info *c) } c->check_lpt_free = c->big_lpt; - return 0; } @@ -176,7 +174,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, int *big_lpt) { int i, lebs_needed; - uint64_t sz; + long long sz; /* Start by assuming the minimum number of LPT LEBs */ c->lpt_lebs = UBIFS_MIN_LPT_LEBS; @@ -203,9 +201,7 @@ static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs, /* Now check there are enough LPT LEBs */ for (i = 0; i < 64 ; i++) { sz = c->lpt_sz * 4; /* Allow 4 times the size */ - sz += c->leb_size - 1; - do_div(sz, c->leb_size); - lebs_needed = sz; + lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); if (lebs_needed > c->lpt_lebs) { /* Not enough LPT LEBs so try again with more */ c->lpt_lebs = lebs_needed; @@ -558,7 +554,7 @@ static int calc_nnode_num(int row, int col) * This function calculates and returns the nnode number based on the parent's * nnode number and the index in parent. */ -static int calc_nnode_num_from_parent(struct ubifs_info *c, +static int calc_nnode_num_from_parent(const struct ubifs_info *c, struct ubifs_nnode *parent, int iip) { int num, shft; @@ -583,7 +579,7 @@ static int calc_nnode_num_from_parent(struct ubifs_info *c, * This function calculates and returns the pnode number based on the parent's * nnode number and the index in parent. */ -static int calc_pnode_num_from_parent(struct ubifs_info *c, +static int calc_pnode_num_from_parent(const struct ubifs_info *c, struct ubifs_nnode *parent, int iip) { int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; @@ -966,7 +962,7 @@ static int check_lpt_type(uint8_t **addr, int *pos, int type) * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_pnode(struct ubifs_info *c, void *buf, +static int unpack_pnode(const struct ubifs_info *c, void *buf, struct ubifs_pnode *pnode) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; @@ -996,15 +992,15 @@ static int unpack_pnode(struct ubifs_info *c, void *buf, } /** - * unpack_nnode - unpack a nnode. + * ubifs_unpack_nnode - unpack a nnode. * @c: UBIFS file-system description object * @buf: buffer containing packed nnode to unpack * @nnode: nnode structure to fill * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_nnode(struct ubifs_info *c, void *buf, - struct ubifs_nnode *nnode) +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1036,7 +1032,7 @@ static int unpack_nnode(struct ubifs_info *c, void *buf, * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_ltab(struct ubifs_info *c, void *buf) +static int unpack_ltab(const struct ubifs_info *c, void *buf) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1068,7 +1064,7 @@ static int unpack_ltab(struct ubifs_info *c, void *buf) * * This function returns %0 on success and a negative error code on failure. */ -static int unpack_lsave(struct ubifs_info *c, void *buf) +static int unpack_lsave(const struct ubifs_info *c, void *buf) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int i, pos = 0, err; @@ -1096,7 +1092,7 @@ static int unpack_lsave(struct ubifs_info *c, void *buf) * * This function returns %0 on success and a negative error code on failure. */ -static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, +static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, struct ubifs_nnode *parent, int iip) { int i, lvl, max_offs; @@ -1140,7 +1136,7 @@ static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode, * * This function returns %0 on success and a negative error code on failure. */ -static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, +static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip) { int i; @@ -1174,7 +1170,8 @@ static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, * This function calculates the LEB numbers for the LEB properties it contains * based on the pnode number. */ -static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode) +static void set_pnode_lnum(const struct ubifs_info *c, + struct ubifs_pnode *pnode) { int i, lnum; @@ -1227,7 +1224,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); if (err) goto out; - err = unpack_nnode(c, buf, nnode); + err = ubifs_unpack_nnode(c, buf, nnode); if (err) goto out; } @@ -1816,7 +1813,7 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, c->nnode_sz); if (err) return ERR_PTR(err); - err = unpack_nnode(c, buf, nnode); + err = ubifs_unpack_nnode(c, buf, nnode); if (err) return ERR_PTR(err); } diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index a41434b42785..96ca95707175 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -320,6 +320,8 @@ no_space: dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return err; } @@ -546,8 +548,10 @@ static int write_cnodes(struct ubifs_info *c) no_space: ubifs_err("LPT out of space mismatch"); dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " - "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); + "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return err; } @@ -749,7 +753,7 @@ static void lpt_tgc_start(struct ubifs_info *c) * LPT trivial garbage collection is where a LPT LEB contains only dirty and * free space and so may be reused as soon as the next commit is completed. * This function is called after the commit is completed (master node has been - * written) and unmaps LPT LEBs that were marked for trivial GC. + * written) and un-maps LPT LEBs that were marked for trivial GC. */ static int lpt_tgc_end(struct ubifs_info *c) { @@ -1025,7 +1029,7 @@ static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num, * @c: UBIFS file-system description object * @node_type: LPT node type */ -static int get_lpt_node_len(struct ubifs_info *c, int node_type) +static int get_lpt_node_len(const struct ubifs_info *c, int node_type) { switch (node_type) { case UBIFS_LPT_NNODE: @@ -1046,7 +1050,7 @@ static int get_lpt_node_len(struct ubifs_info *c, int node_type) * @buf: buffer * @len: length of buffer */ -static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) +static int get_pad_len(const struct ubifs_info *c, uint8_t *buf, int len) { int offs, pad_len; @@ -1063,7 +1067,8 @@ static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len) * @buf: buffer * @node_num: node number is returned here */ -static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) +static int get_lpt_node_type(const struct ubifs_info *c, uint8_t *buf, + int *node_num) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int pos = 0, node_type; @@ -1081,7 +1086,7 @@ static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num) * * This function returns %1 if the buffer contains a node or %0 if it does not. */ -static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) +static int is_a_node(const struct ubifs_info *c, uint8_t *buf, int len) { uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; int pos = 0, node_type, node_len; @@ -1105,7 +1110,6 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) return 1; } - /** * lpt_gc_lnum - garbage collect a LPT LEB. * @c: UBIFS file-system description object @@ -1463,7 +1467,7 @@ void ubifs_lpt_free(struct ubifs_info *c, int wr_only) #ifdef CONFIG_UBIFS_FS_DEBUG /** - * dbg_is_all_ff - determine if a buffer contains only 0xff bytes. + * dbg_is_all_ff - determine if a buffer contains only 0xFF bytes. * @buf: buffer * @len: buffer length */ @@ -1488,7 +1492,7 @@ static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs) struct ubifs_nnode *nnode; int hght; - /* Entire tree is in memory so first_nnode / next_nnode are ok */ + /* Entire tree is in memory so first_nnode / next_nnode are OK */ nnode = first_nnode(c, &hght); for (; nnode; nnode = next_nnode(c, nnode, &hght)) { struct ubifs_nbranch *branch; @@ -1602,7 +1606,10 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) { int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; int ret; - void *buf = c->dbg_buf; + void *buf = c->dbg->buf; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; dbg_lp("LEB %d", lnum); err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); @@ -1704,6 +1711,9 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) long long free = 0; int i; + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + for (i = 0; i < c->lpt_lebs; i++) { if (c->ltab[i].tgc || c->ltab[i].cmt) continue; @@ -1716,6 +1726,8 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) dbg_err("LPT space error: free %lld lpt_sz %lld", free, c->lpt_sz); dbg_dump_lpt_info(c); + dbg_dump_lpt_lebs(c); + dump_stack(); return -EINVAL; } return 0; @@ -1731,15 +1743,19 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) */ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) { + struct ubifs_debug_info *d = c->dbg; long long chk_lpt_sz, lpt_sz; int err = 0; + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + switch (action) { case 0: - c->chk_lpt_sz = 0; - c->chk_lpt_sz2 = 0; - c->chk_lpt_lebs = 0; - c->chk_lpt_wastage = 0; + d->chk_lpt_sz = 0; + d->chk_lpt_sz2 = 0; + d->chk_lpt_lebs = 0; + d->chk_lpt_wastage = 0; if (c->dirty_pn_cnt > c->pnode_cnt) { dbg_err("dirty pnodes %d exceed max %d", c->dirty_pn_cnt, c->pnode_cnt); @@ -1752,35 +1768,35 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) } return err; case 1: - c->chk_lpt_sz += len; + d->chk_lpt_sz += len; return 0; case 2: - c->chk_lpt_sz += len; - c->chk_lpt_wastage += len; - c->chk_lpt_lebs += 1; + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; + d->chk_lpt_lebs += 1; return 0; case 3: chk_lpt_sz = c->leb_size; - chk_lpt_sz *= c->chk_lpt_lebs; + chk_lpt_sz *= d->chk_lpt_lebs; chk_lpt_sz += len - c->nhead_offs; - if (c->chk_lpt_sz != chk_lpt_sz) { + if (d->chk_lpt_sz != chk_lpt_sz) { dbg_err("LPT wrote %lld but space used was %lld", - c->chk_lpt_sz, chk_lpt_sz); + d->chk_lpt_sz, chk_lpt_sz); err = -EINVAL; } - if (c->chk_lpt_sz > c->lpt_sz) { + if (d->chk_lpt_sz > c->lpt_sz) { dbg_err("LPT wrote %lld but lpt_sz is %lld", - c->chk_lpt_sz, c->lpt_sz); + d->chk_lpt_sz, c->lpt_sz); err = -EINVAL; } - if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { + if (d->chk_lpt_sz2 && d->chk_lpt_sz != d->chk_lpt_sz2) { dbg_err("LPT layout size %lld but wrote %lld", - c->chk_lpt_sz, c->chk_lpt_sz2); + d->chk_lpt_sz, d->chk_lpt_sz2); err = -EINVAL; } - if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { + if (d->chk_lpt_sz2 && d->new_nhead_offs != len) { dbg_err("LPT new nhead offs: expected %d was %d", - c->new_nhead_offs, len); + d->new_nhead_offs, len); err = -EINVAL; } lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; @@ -1788,26 +1804,146 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) lpt_sz += c->ltab_sz; if (c->big_lpt) lpt_sz += c->lsave_sz; - if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { + if (d->chk_lpt_sz - d->chk_lpt_wastage > lpt_sz) { dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", - c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); + d->chk_lpt_sz, d->chk_lpt_wastage, lpt_sz); err = -EINVAL; } - if (err) + if (err) { dbg_dump_lpt_info(c); - c->chk_lpt_sz2 = c->chk_lpt_sz; - c->chk_lpt_sz = 0; - c->chk_lpt_wastage = 0; - c->chk_lpt_lebs = 0; - c->new_nhead_offs = len; + dbg_dump_lpt_lebs(c); + dump_stack(); + } + d->chk_lpt_sz2 = d->chk_lpt_sz; + d->chk_lpt_sz = 0; + d->chk_lpt_wastage = 0; + d->chk_lpt_lebs = 0; + d->new_nhead_offs = len; return err; case 4: - c->chk_lpt_sz += len; - c->chk_lpt_wastage += len; + d->chk_lpt_sz += len; + d->chk_lpt_wastage += len; return 0; default: return -EINVAL; } } +/** + * dbg_dump_lpt_leb - dump an LPT LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to dump + * + * This function dumps an LEB from LPT area. Nodes in this area are very + * different to nodes in the main area (e.g., they do not have common headers, + * they do not have 8-byte alignments, etc), so we have a separate function to + * dump LPT area LEBs. Note, LPT has to be locked by the caller. + */ +static void dump_lpt_leb(const struct ubifs_info *c, int lnum) +{ + int err, len = c->leb_size, node_type, node_num, node_len, offs; + void *buf = c->dbg->buf; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); + return; + } + while (1) { + offs = c->leb_size - len; + if (!is_a_node(c, buf, len)) { + int pad_len; + + pad_len = get_pad_len(c, buf, len); + if (pad_len) { + printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); + buf += pad_len; + len -= pad_len; + continue; + } + if (len) + printk(KERN_DEBUG "LEB %d:%d, free %d bytes\n", + lnum, offs, len); + break; + } + + node_type = get_lpt_node_type(c, buf, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { + node_len = c->pnode_sz; + if (c->big_lpt) + printk(KERN_DEBUG "LEB %d:%d, pnode num %d\n", + lnum, offs, node_num); + else + printk(KERN_DEBUG "LEB %d:%d, pnode\n", + lnum, offs); + break; + } + case UBIFS_LPT_NNODE: + { + int i; + struct ubifs_nnode nnode; + + node_len = c->nnode_sz; + if (c->big_lpt) + printk(KERN_DEBUG "LEB %d:%d, nnode num %d, ", + lnum, offs, node_num); + else + printk(KERN_DEBUG "LEB %d:%d, nnode, ", + lnum, offs); + err = ubifs_unpack_nnode(c, buf, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + printk("%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); + if (i != UBIFS_LPT_FANOUT - 1) + printk(", "); + } + printk("\n"); + break; + } + case UBIFS_LPT_LTAB: + node_len = c->ltab_sz; + printk(KERN_DEBUG "LEB %d:%d, ltab\n", + lnum, offs); + break; + case UBIFS_LPT_LSAVE: + node_len = c->lsave_sz; + printk(KERN_DEBUG "LEB %d:%d, lsave len\n", lnum, offs); + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); + return; + } + + buf += node_len; + len -= node_len; + } + + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); +} + +/** + * dbg_dump_lpt_lebs - dump LPT lebs. + * @c: UBIFS file-system description object + * + * This function dumps all LPT LEBs. The caller has to make sure the LPT is + * locked. + */ +void dbg_dump_lpt_lebs(const struct ubifs_info *c) +{ + int i; + + printk(KERN_DEBUG "(pid %d) start dumping all LPT LEBs\n", + current->pid); + for (i = 0; i < c->lpt_lebs; i++) + dump_lpt_leb(c, i + c->lpt_first); + printk(KERN_DEBUG "(pid %d) finish dumping all LPT LEBs\n", + current->pid); +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 9bd5a43d4526..9e6f403f170e 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -899,7 +899,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { struct ubifs_scan_leb *sleb; - sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 21f7d047c306..ce42a7b0ca5a 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -144,7 +144,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) /* * If the replay order was perfect the dirty space would now be * zero. The order is not perfect because the the journal heads - * race with eachother. This is not a problem but is does mean + * race with each other. This is not a problem but is does mean * that the dirty space may temporarily exceed c->leb_size * during the replay. */ @@ -656,7 +656,7 @@ out_dump: * @dirty: amount of dirty space from padding and deletion nodes * * This function inserts a reference node to the replay tree and returns zero - * in case of success ort a negative error code in case of failure. + * in case of success or a negative error code in case of failure. */ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, unsigned long long sqnum, int free, int dirty) @@ -883,7 +883,7 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) * This means that we reached end of log and now * look to the older log data, which was already * committed but the eraseblock was not erased (UBIFS - * only unmaps it). So this basically means we have to + * only un-maps it). So this basically means we have to * exit with "end of log" code. */ err = 1; @@ -1062,6 +1062,15 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; + /* + * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable + * to roughly estimate index growth. Things like @c->min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ + c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); + c->budg_uncommitted_idx *= c->max_idx_node_sz; + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 0f392351dc5a..e070c643d1bb 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -28,6 +28,7 @@ #include "ubifs.h" #include <linux/random.h> +#include <linux/math64.h> /* * Default journal size in logical eraseblocks as a percent of total @@ -80,7 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c) int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first; int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; int min_leb_cnt = UBIFS_MIN_LEB_CNT; - uint64_t tmp64, main_bytes; + long long tmp64, main_bytes; __le64 tmp_le64; /* Some functions called from here depend on the @c->key_len filed */ @@ -160,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c) if (!sup) return -ENOMEM; - tmp64 = (uint64_t)max_buds * c->leb_size; + tmp64 = (long long)max_buds * c->leb_size; if (big_lpt) sup_flags |= UBIFS_FLG_BIGLPT; @@ -179,14 +180,16 @@ static int create_default_filesystem(struct ubifs_info *c) sup->fanout = cpu_to_le32(DEFAULT_FANOUT); sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); - sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); + if (c->mount_opts.override_compr) + sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); + else + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); generate_random_uuid(sup->uuid); - main_bytes = (uint64_t)main_lebs * c->leb_size; - tmp64 = main_bytes * DEFAULT_RP_PERCENT; - do_div(tmp64, 100); + main_bytes = (long long)main_lebs * c->leb_size; + tmp64 = div_u64(main_bytes * DEFAULT_RP_PERCENT, 100); if (tmp64 > DEFAULT_MAX_RP_SIZE) tmp64 = DEFAULT_MAX_RP_SIZE; sup->rp_size = cpu_to_le64(tmp64); @@ -582,16 +585,15 @@ int ubifs_read_superblock(struct ubifs_info *c) c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; c->fanout = le32_to_cpu(sup->fanout); c->lsave_cnt = le32_to_cpu(sup->lsave_cnt); - c->default_compr = le16_to_cpu(sup->default_compr); c->rp_size = le64_to_cpu(sup->rp_size); c->rp_uid = le32_to_cpu(sup->rp_uid); c->rp_gid = le32_to_cpu(sup->rp_gid); sup_flags = le32_to_cpu(sup->flags); + if (!c->mount_opts.override_compr) + c->default_compr = le16_to_cpu(sup->default_compr); c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); - memcpy(&c->uuid, &sup->uuid, 16); - c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); /* Automatically increase file system size to the maximum size */ diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index d80b2aef42b6..0d7564b95f8e 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -34,6 +34,8 @@ #include <linux/parser.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/math64.h> +#include <linux/writeback.h> #include "ubifs.h" /* @@ -417,39 +419,54 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) else if (c->mount_opts.chk_data_crc == 1) seq_printf(s, ",no_chk_data_crc"); + if (c->mount_opts.override_compr) { + seq_printf(s, ",compr="); + seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); + } + return 0; } static int ubifs_sync_fs(struct super_block *sb, int wait) { + int i, err; struct ubifs_info *c = sb->s_fs_info; - int i, ret = 0, err; - long long bud_bytes; - - if (c->jheads) { - for (i = 0; i < c->jhead_cnt; i++) { - err = ubifs_wbuf_sync(&c->jheads[i].wbuf); - if (err && !ret) - ret = err; - } + struct writeback_control wbc = { + .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_HOLD, + .range_start = 0, + .range_end = LLONG_MAX, + .nr_to_write = LONG_MAX, + }; + + if (sb->s_flags & MS_RDONLY) + return 0; - /* Commit the journal unless it has too little data */ - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - if (bud_bytes > c->leb_size) { - err = ubifs_run_commit(c); - if (err) - return err; - } + /* + * Synchronize write buffers, because 'ubifs_run_commit()' does not + * do this if it waits for an already running commit. + */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + return err; } /* - * We ought to call sync for c->ubi but it does not have one. If it had - * it would in turn call mtd->sync, however mtd operations are - * synchronous anyway, so we don't lose any sleep here. + * VFS calls '->sync_fs()' before synchronizing all dirty inodes and + * pages, so synchronize them first, then commit the journal. Strictly + * speaking, it is not necessary to commit the journal here, + * synchronizing write-buffers would be enough. But committing makes + * UBIFS free space predictions much more accurate, so we want to let + * the user be able to get more accurate results of 'statfs()' after + * they synchronize the file system. */ - return ret; + generic_sync_sb_inodes(sb, &wbc); + + err = ubifs_run_commit(c); + if (err) + return err; + + return ubi_sync(c->vi.ubi_num); } /** @@ -596,7 +613,7 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) } /* - * init_constants_late - initialize UBIFS constants. + * init_constants_sb - initialize UBIFS constants. * @c: UBIFS file-system description object * * This is a helper function which initializes various UBIFS constants after @@ -604,10 +621,10 @@ static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad) * makes sure they are all right. Returns zero in case of success and a * negative error code in case of failure. */ -static int init_constants_late(struct ubifs_info *c) +static int init_constants_sb(struct ubifs_info *c) { int tmp, err; - uint64_t tmp64; + long long tmp64; c->main_bytes = (long long)c->main_lebs * c->leb_size; c->max_znode_sz = sizeof(struct ubifs_znode) + @@ -634,9 +651,8 @@ static int init_constants_late(struct ubifs_info *c) * Make sure that the log is large enough to fit reference nodes for * all buds plus one reserved LEB. */ - tmp64 = c->max_bud_bytes; - tmp = do_div(tmp64, c->leb_size); - c->max_bud_cnt = tmp64 + !!tmp; + tmp64 = c->max_bud_bytes + c->leb_size - 1; + c->max_bud_cnt = div_u64(tmp64, c->leb_size); tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); tmp /= c->leb_size; tmp += 1; @@ -672,7 +688,7 @@ static int init_constants_late(struct ubifs_info *c) * Consequently, if the journal is too small, UBIFS will treat it as * always full. */ - tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1; + tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; if (c->bg_bud_bytes < tmp64) c->bg_bud_bytes = tmp64; if (c->max_bud_bytes < tmp64 + c->leb_size) @@ -682,6 +698,21 @@ static int init_constants_late(struct ubifs_info *c) if (err) return err; + return 0; +} + +/* + * init_constants_master - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the master node has been read. It also checks various UBIFS parameters and + * makes sure they are all right. + */ +static void init_constants_master(struct ubifs_info *c) +{ + long long tmp64; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); /* @@ -690,14 +721,13 @@ static int init_constants_late(struct ubifs_info *c) * necessary to report something for the 'statfs()' call. * * Subtract the LEB reserved for GC, the LEB which is reserved for - * deletions, and assume only one journal head is available. + * deletions, minimum LEBs for the index, and assume only one journal + * head is available. */ - tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; - tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; + tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; + tmp64 *= (long long)c->leb_size - c->leb_overhead; tmp64 = ubifs_reported_space(c, tmp64); c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; - - return 0; } /** @@ -878,6 +908,7 @@ static int check_volume_empty(struct ubifs_info *c) * Opt_no_bulk_read: disable bulk-reads * Opt_chk_data_crc: check CRCs when reading data nodes * Opt_no_chk_data_crc: do not check CRCs when reading data nodes + * Opt_override_compr: override default compressor * Opt_err: just end of array marker */ enum { @@ -887,6 +918,7 @@ enum { Opt_no_bulk_read, Opt_chk_data_crc, Opt_no_chk_data_crc, + Opt_override_compr, Opt_err, }; @@ -897,6 +929,7 @@ static const match_table_t tokens = { {Opt_no_bulk_read, "no_bulk_read"}, {Opt_chk_data_crc, "chk_data_crc"}, {Opt_no_chk_data_crc, "no_chk_data_crc"}, + {Opt_override_compr, "compr=%s"}, {Opt_err, NULL}, }; @@ -950,6 +983,28 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, c->mount_opts.chk_data_crc = 1; c->no_chk_data_crc = 1; break; + case Opt_override_compr: + { + char *name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strcmp(name, "none")) + c->mount_opts.compr_type = UBIFS_COMPR_NONE; + else if (!strcmp(name, "lzo")) + c->mount_opts.compr_type = UBIFS_COMPR_LZO; + else if (!strcmp(name, "zlib")) + c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; + else { + ubifs_err("unknown compressor \"%s\"", name); + kfree(name); + return -EINVAL; + } + kfree(name); + c->mount_opts.override_compr = 1; + c->default_compr = c->mount_opts.compr_type; + break; + } default: ubifs_err("unrecognized mount option \"%s\" " "or missing value", p); @@ -1019,6 +1074,30 @@ again: } /** + * check_free_space - check if there is enough free space to mount. + * @c: UBIFS file-system description object + * + * This function makes sure UBIFS has enough free space to be mounted in + * read/write mode. UBIFS must always have some free space to allow deletions. + */ +static int check_free_space(struct ubifs_info *c) +{ + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { + ubifs_err("insufficient free space to mount in read/write mode"); + dbg_dump_budg(c); + dbg_dump_lprops(c); + /* + * We return %-EINVAL instead of %-ENOSPC because it seems to + * be the closest error code mentioned in the mount function + * documentation. + */ + return -EINVAL; + } + return 0; +} + +/** * mount_ubifs - mount UBIFS file-system. * @c: UBIFS file-system description object * @@ -1039,11 +1118,9 @@ static int mount_ubifs(struct ubifs_info *c) if (err) return err; -#ifdef CONFIG_UBIFS_FS_DEBUG - c->dbg_buf = vmalloc(c->leb_size); - if (!c->dbg_buf) - return -ENOMEM; -#endif + err = ubifs_debugging_init(c); + if (err) + return err; err = check_volume_empty(c); if (err) @@ -1100,27 +1177,25 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; /* - * Make sure the compressor which is set as the default on in the - * superblock was actually compiled in. + * Make sure the compressor which is set as default in the superblock + * or overridden by mount options is actually compiled in. */ if (!ubifs_compr_present(c->default_compr)) { - ubifs_warn("'%s' compressor is set by superblock, but not " - "compiled in", ubifs_compr_name(c->default_compr)); - c->default_compr = UBIFS_COMPR_NONE; + ubifs_err("'compressor \"%s\" is not compiled in", + ubifs_compr_name(c->default_compr)); + goto out_free; } - dbg_failure_mode_registration(c); - - err = init_constants_late(c); + err = init_constants_sb(c); if (err) - goto out_dereg; + goto out_free; sz = ALIGN(c->max_idx_node_sz, c->min_io_size); sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); c->cbuf = kmalloc(sz, GFP_NOFS); if (!c->cbuf) { err = -ENOMEM; - goto out_dereg; + goto out_free; } sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); @@ -1145,6 +1220,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_master; + init_constants_master(c); + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; @@ -1183,12 +1260,9 @@ static int mount_ubifs(struct ubifs_info *c) if (!mounted_read_only) { int lnum; - /* Check for enough free space */ - if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { - ubifs_err("insufficient available space"); - err = -EINVAL; + err = check_free_space(c); + if (err) goto out_orphans; - } /* Check for enough log space */ lnum = c->lhead_lnum + 1; @@ -1232,6 +1306,10 @@ static int mount_ubifs(struct ubifs_info *c) } } + err = dbg_debugfs_init_fs(c); + if (err) + goto out_infos; + err = dbg_check_filesystem(c); if (err) goto out_infos; @@ -1283,8 +1361,20 @@ static int mount_ubifs(struct ubifs_info *c) dbg_msg("tree fanout: %d", c->fanout); dbg_msg("reserved GC LEB: %d", c->gc_lnum); dbg_msg("first main LEB: %d", c->main_first); + dbg_msg("max. znode size %d", c->max_znode_sz); + dbg_msg("max. index node size %d", c->max_idx_node_sz); + dbg_msg("node sizes: data %zu, inode %zu, dentry %zu", + UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); + dbg_msg("node sizes: trun %zu, sb %zu, master %zu", + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); + dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, + UBIFS_MAX_DENT_NODE_SZ); dbg_msg("dead watermark: %d", c->dead_wm); dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); x = (long long)c->main_lebs * c->dark_wm; dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)", x, x >> 10, x >> 20); @@ -1320,14 +1410,12 @@ out_wbufs: free_wbufs(c); out_cbuf: kfree(c->cbuf); -out_dereg: - dbg_failure_mode_deregistration(c); out_free: kfree(c->bu.buf); vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); - UBIFS_DBG(vfree(c->dbg_buf)); + ubifs_debugging_exit(c); return err; } @@ -1345,6 +1433,7 @@ static void ubifs_umount(struct ubifs_info *c) dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); + dbg_debugfs_exit_fs(c); spin_lock(&ubifs_infos_lock); list_del(&c->infos_list); spin_unlock(&ubifs_infos_lock); @@ -1364,8 +1453,7 @@ static void ubifs_umount(struct ubifs_info *c) vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); - UBIFS_DBG(vfree(c->dbg_buf)); - dbg_failure_mode_deregistration(c); + ubifs_debugging_exit(c); } /** @@ -1387,12 +1475,9 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->always_chk_crc = 1; - /* Check for enough free space */ - if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { - ubifs_err("insufficient available space"); - err = -EINVAL; + err = check_free_space(c); + if (err) goto out; - } if (c->old_leb_cnt != c->leb_cnt) { struct ubifs_sb_node *sup; @@ -1515,20 +1600,24 @@ out: * @c: UBIFS file-system description object * * This function is called during un-mounting and re-mounting, and it commits - * the journal unless the "fast unmount" mode is enabled. It also avoids - * committing the journal if it contains too few data. + * the journal unless the "fast unmount" mode is enabled. */ static void commit_on_unmount(struct ubifs_info *c) { - if (!c->fast_unmount) { - long long bud_bytes; + struct super_block *sb = c->vfs_sb; + long long bud_bytes; - spin_lock(&c->buds_lock); - bud_bytes = c->bud_bytes; - spin_unlock(&c->buds_lock); - if (bud_bytes > c->leb_size) - ubifs_run_commit(c); - } + /* + * This function is called before the background thread is stopped, so + * we may race with ongoing commit, which means we have to take + * @c->bud_lock to access @c->bud_bytes. + */ + spin_lock(&c->buds_lock); + bud_bytes = c->bud_bytes; + spin_unlock(&c->buds_lock); + + if (!c->fast_unmount && !(sb->s_flags & MS_RDONLY) && bud_bytes) + ubifs_run_commit(c); } /** @@ -1849,7 +1938,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) goto out_iput; mutex_unlock(&c->umount_mutex); - return 0; out_iput: @@ -1955,7 +2043,7 @@ static void ubifs_kill_sb(struct super_block *sb) * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()' * in order to be outside BKL. */ - if (sb->s_root && !(sb->s_flags & MS_RDONLY)) + if (sb->s_root) commit_on_unmount(c); /* The un-mount routine is actually done in put_super() */ generic_shutdown_super(sb); @@ -2021,6 +2109,14 @@ static int __init ubifs_init(void) BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); /* + * We use 2 bit wide bit-fields to store compression type, which should + * be amended if more compressors are added. The bit-fields are: + * @compr_type in 'struct ubifs_inode', @default_compr in + * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. + */ + BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); + + /* * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. */ @@ -2049,11 +2145,17 @@ static int __init ubifs_init(void) err = ubifs_compressors_init(); if (err) + goto out_shrinker; + + err = dbg_debugfs_init(); + if (err) goto out_compr; return 0; out_compr: + ubifs_compressors_exit(); +out_shrinker: unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); out_reg: @@ -2068,6 +2170,7 @@ static void __exit ubifs_exit(void) ubifs_assert(list_empty(&ubifs_infos)); ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0); + dbg_debugfs_exit(); ubifs_compressors_exit(); unregister_shrinker(&ubifs_shrinker_info); kmem_cache_destroy(ubifs_inode_slab); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 6eef5344a145..f7e36f545527 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2245,12 +2245,11 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, if (found) { /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, - znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } zbr = &znode->zbranch[n]; lnc_free(zbr); @@ -2317,11 +2316,11 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } if (found == 1) { @@ -2627,11 +2626,11 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } /* Remove all keys in range except the first */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 8ac76b1c2d55..fde8d127c768 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -553,8 +553,8 @@ static int layout_in_empty_space(struct ubifs_info *c) } #ifdef CONFIG_UBIFS_FS_DEBUG - c->new_ihead_lnum = lnum; - c->new_ihead_offs = buf_offs; + c->dbg->new_ihead_lnum = lnum; + c->dbg->new_ihead_offs = buf_offs; #endif return 0; @@ -802,8 +802,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) * budgeting subsystem to assume the index is already committed, * even though it is not. */ + ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); c->old_idx_sz = c->calc_idx_sz; c->budg_uncommitted_idx = 0; + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); mutex_unlock(&c->tnc_mutex); @@ -1002,7 +1004,8 @@ static int write_index(struct ubifs_info *c) } #ifdef CONFIG_UBIFS_FS_DEBUG - if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { + if (lnum != c->dbg->new_ihead_lnum || + buf_offs != c->dbg->new_ihead_offs) { ubifs_err("inconsistent ihead"); return -EINVAL; } diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 0b378042a3a2..b25fc36cf72f 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -51,6 +51,13 @@ */ #define UBIFS_MIN_COMPR_LEN 128 +/* + * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes + * shorter than uncompressed data length, UBIFS preferes to leave this data + * node uncompress, because it'll be read faster. + */ +#define UBIFS_MIN_COMPRESS_DIFF 64 + /* Root inode number */ #define UBIFS_ROOT_INO 1 diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 46b172560a06..fc2a4cc66d03 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -63,6 +63,14 @@ #define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL #define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL +/* + * Minimum amount of LEBs reserved for the index. At present the index needs at + * least 2 LEBs: one for the index head and one for in-the-gaps method (which + * currently does not cater for the index head and so excludes it from + * consideration). + */ +#define MIN_INDEX_LEBS 2 + /* Minimum amount of data UBIFS writes to the flash */ #define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) @@ -386,12 +394,12 @@ struct ubifs_inode { unsigned int dirty:1; unsigned int xattr:1; unsigned int bulk_read:1; + unsigned int compr_type:2; struct mutex ui_mutex; spinlock_t ui_lock; loff_t synced_i_size; loff_t ui_size; int flags; - int compr_type; pgoff_t last_page_read; pgoff_t read_in_a_row; int data_len; @@ -419,7 +427,7 @@ struct ubifs_unclean_leb { * * LPROPS_UNCAT: not categorized * LPROPS_DIRTY: dirty > 0, not index - * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index + * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index * LPROPS_FREE: free > 0, not empty, not index * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs * LPROPS_EMPTY: LEB is empty, not taken @@ -473,8 +481,8 @@ struct ubifs_lprops { struct ubifs_lpt_lprops { int free; int dirty; - unsigned tgc : 1; - unsigned cmt : 1; + unsigned tgc:1; + unsigned cmt:1; }; /** @@ -482,24 +490,26 @@ struct ubifs_lpt_lprops { * @empty_lebs: number of empty LEBs * @taken_empty_lebs: number of taken LEBs * @idx_lebs: number of indexing LEBs - * @total_free: total free space in bytes - * @total_dirty: total dirty space in bytes - * @total_used: total used space in bytes (includes only data LEBs) - * @total_dead: total dead space in bytes (includes only data LEBs) - * @total_dark: total dark space in bytes (includes only data LEBs) + * @total_free: total free space in bytes (includes all LEBs) + * @total_dirty: total dirty space in bytes (includes all LEBs) + * @total_used: total used space in bytes (does not include index LEBs) + * @total_dead: total dead space in bytes (does not include index LEBs) + * @total_dark: total dark space in bytes (does not include index LEBs) + * + * The @taken_empty_lebs field counts the LEBs that are in the transient state + * of having been "taken" for use but not yet written to. @taken_empty_lebs is + * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be + * used by itself (in which case 'unused_lebs' would be a better name). In the + * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained + * by GC, but unlike other empty LEBs that are "taken", it may not be written + * straight away (i.e. before the next commit start or unmount), so either + * @gc_lnum must be specially accounted for, or the current approach followed + * i.e. count it under @taken_empty_lebs. * - * N.B. total_dirty and total_used are different to other total_* fields, - * because they account _all_ LEBs, not just data LEBs. + * @empty_lebs includes @taken_empty_lebs. * - * 'taken_empty_lebs' counts the LEBs that are in the transient state of having - * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed - * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used - * by itself (in which case 'unused_lebs' would be a better name). In the case - * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC, - * but unlike other empty LEBs that are 'taken', it may not be written straight - * away (i.e. before the next commit start or unmount), so either gc_lnum must - * be specially accounted for, or the current approach followed i.e. count it - * under 'taken_empty_lebs'. + * @total_used, @total_dead and @total_dark fields do not account indexing + * LEBs. */ struct ubifs_lp_stats { int empty_lebs; @@ -893,15 +903,25 @@ struct ubifs_orphan { /** * struct ubifs_mount_opts - UBIFS-specific mount options information. * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) - * @bulk_read: enable bulk-reads - * @chk_data_crc: check CRCs when reading data nodes + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + * (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + * superblock compressor, %1 - override and use compressor + * specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + * (%UBIFS_COMPR_NONE, etc) */ struct ubifs_mount_opts { unsigned int unmount_mode:2; unsigned int bulk_read:2; unsigned int chk_data_crc:2; + unsigned int override_compr:1; + unsigned int compr_type:2; }; +struct ubifs_debug_info; + /** * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). @@ -946,6 +966,7 @@ struct ubifs_mount_opts { * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and * @calc_idx_sz @@ -963,8 +984,6 @@ struct ubifs_mount_opts { * @ileb_nxt: next pre-allocated index LEBs * @old_idx: tree of index nodes obsoleted since the last commit start * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c - * @new_ihead_lnum: used by debugging to check ihead_lnum - * @new_ihead_offs: used by debugging to check ihead_offs * * @mst_node: master node * @mst_offs: offset of valid master node @@ -986,7 +1005,6 @@ struct ubifs_mount_opts { * @main_lebs: count of LEBs in the main area * @main_first: first LEB of the main area * @main_bytes: main area size in bytes - * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) * * @key_hash_type: type of the key hash * @key_hash: direntry key hash function @@ -1149,15 +1167,7 @@ struct ubifs_mount_opts { * @always_chk_crc: always check CRCs (while mounting and remounting rw) * @mount_opts: UBIFS-specific mount options * - * @dbg_buf: a buffer of LEB size used for debugging purposes - * @old_zroot: old index root - used by 'dbg_check_old_index()' - * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' - * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' - * @failure_mode: failure mode for recovery testing - * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls - * @fail_timeout: time in jiffies when delay of failure mode expires - * @fail_cnt: current number of calls to failure mode I/O functions - * @fail_cnt_max: number of calls by which to delay failure mode + * @dbg: debugging-related information */ struct ubifs_info { struct super_block *vfs_sb; @@ -1196,6 +1206,7 @@ struct ubifs_info { unsigned int big_lpt:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; + unsigned int default_compr:2; struct mutex tnc_mutex; struct ubifs_zbranch zroot; @@ -1212,10 +1223,6 @@ struct ubifs_info { int ileb_nxt; struct rb_root old_idx; int *bottom_up_buf; -#ifdef CONFIG_UBIFS_FS_DEBUG - int new_ihead_lnum; - int new_ihead_offs; -#endif struct ubifs_mst_node *mst_node; int mst_offs; @@ -1237,7 +1244,6 @@ struct ubifs_info { int main_lebs; int main_first; long long main_bytes; - int default_compr; uint8_t key_hash_type; uint32_t (*key_hash)(const char *str, int len); @@ -1315,8 +1321,8 @@ struct ubifs_info { void *sbuf; struct list_head idx_gc; int idx_gc_cnt; - volatile int gc_seq; - volatile int gced_lnum; + int gc_seq; + int gced_lnum; struct list_head infos_list; struct mutex umount_mutex; @@ -1391,21 +1397,7 @@ struct ubifs_info { struct ubifs_mount_opts mount_opts; #ifdef CONFIG_UBIFS_FS_DEBUG - void *dbg_buf; - struct ubifs_zbranch old_zroot; - int old_zroot_level; - unsigned long long old_zroot_sqnum; - int failure_mode; - int fail_delay; - unsigned long fail_timeout; - unsigned int fail_cnt; - unsigned int fail_cnt_max; - long long chk_lpt_sz; - long long chk_lpt_sz2; - long long chk_lpt_wastage; - int chk_lpt_lebs; - int new_nhead_lnum; - int new_nhead_offs; + struct ubifs_debug_info *dbg; #endif }; @@ -1505,7 +1497,7 @@ void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, long long ubifs_get_free_space(struct ubifs_info *c); int ubifs_calc_min_idx_lebs(struct ubifs_info *c); void ubifs_convert_page_budget(struct ubifs_info *c); -long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); +long long ubifs_reported_space(const struct ubifs_info *c, long long free); long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); /* find.c */ @@ -1639,6 +1631,9 @@ void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); +/* Needed only in debugging code in lpt_commit.c */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, + struct ubifs_nnode *nnode); /* lpt_commit.c */ int ubifs_lpt_start_commit(struct ubifs_info *c); @@ -1714,7 +1709,7 @@ long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* compressor.c */ int __init ubifs_compressors_init(void); -void __exit ubifs_compressors_exit(void); +void ubifs_compressors_exit(void); void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, int *compr_type); int ubifs_decompress(const void *buf, int len, void *out, int *out_len, diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 54bbf6e04ee8..0e9e2bc0ee96 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -40,6 +40,9 @@ #ifndef node_to_cpumask #define node_to_cpumask(node) ((void)node, cpu_online_map) #endif +#ifndef cpumask_of_node +#define cpumask_of_node(node) ((void)node, cpu_online_mask) +#endif #ifndef node_to_first_cpu #define node_to_first_cpu(node) ((void)(node),0) #endif @@ -54,9 +57,18 @@ ) #endif +#ifndef cpumask_of_pcibus +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) +#endif + #endif /* CONFIG_NUMA */ -/* returns pointer to cpumask for specified node */ +/* + * returns pointer to cpumask for specified node + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #ifndef node_to_cpumask_ptr #define node_to_cpumask_ptr(v, node) \ diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h index c5dd66916692..b96a6d2ffbc3 100644 --- a/include/asm-m32r/smp.h +++ b/include/asm-m32r/smp.h @@ -63,8 +63,6 @@ extern volatile int cpu_2_physid[NR_CPUS]; #define raw_smp_processor_id() (current_thread_info()->cpu) extern cpumask_t cpu_callout_map; -extern cpumask_t cpu_possible_map; -extern cpumask_t cpu_present_map; static __inline__ int hard_smp_processor_id(void) { diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 3209dd46ea7d..b24ff086a662 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -31,7 +31,7 @@ struct pciserial_board { struct serial_private; struct serial_private * -pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); +pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board); void pciserial_remove_ports(struct serial_private *priv); void pciserial_suspend_ports(struct serial_private *priv); void pciserial_resume_ports(struct serial_private *priv); diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index ed3a5d473e52..cea153697ec7 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -82,13 +82,13 @@ struct clock_event_device { int shift; int rating; int irq; - cpumask_t cpumask; + const struct cpumask *cpumask; int (*set_next_event)(unsigned long evt, struct clock_event_device *); void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); void (*event_handler)(struct clock_event_device *); - void (*broadcast)(cpumask_t mask); + void (*broadcast)(const struct cpumask *mask); struct list_head list; enum clock_event_mode mode; ktime_t next_event; diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5c8351b859f0..af40f8eb86f0 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -61,3 +61,8 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) + +#define __gcc_header(x) #x +#define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) +#define gcc_header(x) _gcc_header(x) +#include gcc_header(__GNUC__) diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h index e5eb795f78a1..8005effc04f1 100644 --- a/include/linux/compiler-gcc3.h +++ b/include/linux/compiler-gcc3.h @@ -2,8 +2,9 @@ #error "Please don't include <linux/compiler-gcc3.h> directly, include <linux/compiler.h> instead." #endif -/* These definitions are for GCC v3.x. */ -#include <linux/compiler-gcc.h> +#if __GNUC_MINOR__ < 2 +# error Sorry, your compiler is too old - please upgrade it. +#endif #if __GNUC_MINOR__ >= 3 # define __used __attribute__((__used__)) diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 974f5b7bb205..09992718f9e8 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -2,8 +2,10 @@ #error "Please don't include <linux/compiler-gcc4.h> directly, include <linux/compiler.h> instead." #endif -/* These definitions are for GCC v4.x. */ -#include <linux/compiler-gcc.h> +/* GCC 4.1.[01] miscompiles __weak */ +#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1 +# error Your version of gcc miscompiles the __weak directive +#endif #define __used __attribute__((__used__)) #define __must_check __attribute__((warn_unused_result)) @@ -16,7 +18,7 @@ */ #define uninitialized_var(x) x = x -#if !(__GNUC__ == 4 && __GNUC_MINOR__ < 3) +#if __GNUC_MINOR__ >= 3 /* Mark functions as cold. gcc will assume any path leading to a call to them will be unlikely. This means a lot of manual unlikely()s are unnecessary now for any paths leading to the usual suspects diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ea7c6be354b7..d95da1020f1c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -36,12 +36,8 @@ extern void __chk_io_ptr(const volatile void __iomem *); #ifdef __KERNEL__ -#if __GNUC__ >= 4 -# include <linux/compiler-gcc4.h> -#elif __GNUC__ == 3 && __GNUC_MINOR__ >= 2 -# include <linux/compiler-gcc3.h> -#else -# error Sorry, your compiler is too old/not recognized. +#ifdef __GNUC__ +#include <linux/compiler-gcc.h> #endif #define notrace __attribute__((no_instrument_function)) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 21e1dd43e52a..d4bf52603e6b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -339,36 +339,6 @@ extern cpumask_t cpu_mask_all; #endif #define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) -#define cpumask_scnprintf(buf, len, src) \ - __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpumask_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - -#define cpumask_parse_user(ubuf, ulen, dst) \ - __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) -static inline int __cpumask_parse_user(const char __user *buf, int len, - cpumask_t *dstp, int nbits) -{ - return bitmap_parse_user(buf, len, dstp->bits, nbits); -} - -#define cpulist_scnprintf(buf, len, src) \ - __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpulist_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - -#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) -static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) -{ - return bitmap_parselist(buf, dstp->bits, nbits); -} - #define cpu_remap(oldbit, old, new) \ __cpu_remap((oldbit), &(old), &(new), NR_CPUS) static inline int __cpu_remap(int oldbit, @@ -540,9 +510,6 @@ extern cpumask_t cpu_active_map; [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } -/* This produces more efficient code. */ -#define nr_cpumask_bits NR_CPUS - #else /* NR_CPUS > BITS_PER_LONG */ #define CPU_BITS_ALL \ @@ -550,9 +517,15 @@ extern cpumask_t cpu_active_map; [0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL, \ [BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD \ } +#endif /* NR_CPUS > BITS_PER_LONG */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, + * not all bits may be allocated. */ #define nr_cpumask_bits nr_cpu_ids -#endif /* NR_CPUS > BITS_PER_LONG */ +#else +#define nr_cpumask_bits NR_CPUS +#endif /* verify cpu argument to cpumask_* operators */ static inline unsigned int cpumask_check(unsigned int cpu) @@ -946,6 +919,63 @@ static inline void cpumask_copy(struct cpumask *dstp, #define cpumask_of(cpu) (get_cpu_mask(cpu)) /** + * cpumask_scnprintf - print a cpumask into a string as comma-separated hex + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpumask_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpumask_parse_user - extract a cpumask from a user string + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpumask_parse_user(const char __user *buf, int len, + struct cpumask *dstp) +{ + return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits); +} + +/** + * cpulist_scnprintf - print a cpumask into a string as comma-separated list + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpulist_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpulist_parse_user - extract a cpumask from a user string of ranges + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +{ + return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits); +} + +/** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap * diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a37359d0bad1..c66d22487bf8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -75,14 +75,22 @@ full_name_hash(const unsigned char *name, unsigned int len) return end_name_hash(hash); } -struct dcookie_struct; - -#define DNAME_INLINE_LEN_MIN 36 +/* + * Try to keep struct dentry aligned on 64 byte cachelines (this will + * give reasonable cacheline footprint with larger lines without the + * large memory footprint increase). + */ +#ifdef CONFIG_64BIT +#define DNAME_INLINE_LEN_MIN 32 /* 192 bytes */ +#else +#define DNAME_INLINE_LEN_MIN 40 /* 128 bytes */ +#endif struct dentry { atomic_t d_count; unsigned int d_flags; /* protected by d_lock */ spinlock_t d_lock; /* per dentry lock */ + int d_mounted; struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ /* @@ -107,10 +115,7 @@ struct dentry { struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ void *d_fsdata; /* fs-specific data */ -#ifdef CONFIG_PROFILING - struct dcookie_struct *d_cookie; /* cookie, if any */ -#endif - int d_mounted; + unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; @@ -177,6 +182,8 @@ d_iput: no no no yes #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ +#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 4aab6f12cfab..09d6c5bbdddd 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -57,8 +57,6 @@ struct files_struct { #define files_fdtable(files) (rcu_dereference((files)->fdt)) -extern struct kmem_cache *filp_cachep; - struct file_operations; struct vfsmount; struct dentry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 001ded4845b4..e2170ee21e18 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -21,7 +21,6 @@ /* Fixed constants first: */ #undef NR_OPEN -extern int sysctl_nr_open; #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define BLOCK_SIZE_BITS 10 @@ -38,21 +37,13 @@ struct files_stat_struct { int nr_free_files; /* read only */ int max_files; /* tunable */ }; -extern struct files_stat_struct files_stat; -extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; int nr_unused; int dummy[5]; /* padding for sysctl ABI compatibility */ }; -extern struct inodes_stat_t inodes_stat; -extern int leases_enable, lease_break_time; - -#ifdef CONFIG_DNOTIFY -extern int dir_notify_enable; -#endif #define NR_FILE 8192 /* this can well be larger on a larger system */ @@ -330,6 +321,15 @@ extern void __init inode_init(void); extern void __init inode_init_early(void); extern void __init files_init(unsigned long); +extern struct files_stat_struct files_stat; +extern int get_max_files(void); +extern int sysctl_nr_open; +extern struct inodes_stat_t inodes_stat; +extern int leases_enable, lease_break_time; +#ifdef CONFIG_DNOTIFY +extern int dir_notify_enable; +#endif + struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); @@ -1212,7 +1212,6 @@ extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, int); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); @@ -1310,7 +1309,6 @@ struct file_operations { ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); @@ -1869,7 +1867,7 @@ extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, int open_flag, int mode); -extern int may_open(struct nameidata *, int, int); +extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); extern struct file * open_exec(const char *); @@ -1904,6 +1902,8 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); +extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); +extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); extern void __iget(struct inode * inode); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 9e5a06e78d02..a97c053d3a9a 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -10,12 +10,6 @@ struct fs_struct { struct path root, pwd; }; -#define INIT_FS { \ - .count = ATOMIC_INIT(1), \ - .lock = RW_LOCK_UNLOCKED, \ - .umask = 0022, \ -} - extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); diff --git a/include/linux/generic_serial.h b/include/linux/generic_serial.h index 4cc913939817..fadff28505bb 100644 --- a/include/linux/generic_serial.h +++ b/include/linux/generic_serial.h @@ -21,7 +21,6 @@ struct real_driver { void (*enable_tx_interrupts) (void *); void (*disable_rx_interrupts) (void *); void (*enable_rx_interrupts) (void *); - int (*get_CD) (void *); void (*shutdown_port) (void*); int (*set_real_termios) (void*); int (*chars_in_buffer) (void*); diff --git a/include/linux/ide.h b/include/linux/ide.h index e99c56de7f56..db5ef8ae1ab9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -32,13 +32,6 @@ # define SUPPORT_VLB_SYNC 1 #endif -/* - * Used to indicate "no IRQ", should be a value that cannot be an IRQ - * number. - */ - -#define IDE_NO_IRQ (-1) - typedef unsigned char byte; /* used everywhere */ /* @@ -403,6 +396,7 @@ enum { * This is used for several packet commands (not for READ/WRITE commands). */ #define IDE_PC_BUFFER_SIZE 256 +#define ATAPI_WAIT_PC (60 * HZ) struct ide_atapi_pc { /* actual packet bytes */ @@ -480,53 +474,53 @@ enum { /* ide-cd */ /* Drive cannot eject the disc. */ - IDE_AFLAG_NO_EJECT = (1 << 3), + IDE_AFLAG_NO_EJECT = (1 << 1), /* Drive is a pre ATAPI 1.2 drive. */ - IDE_AFLAG_PRE_ATAPI12 = (1 << 4), + IDE_AFLAG_PRE_ATAPI12 = (1 << 2), /* TOC addresses are in BCD. */ - IDE_AFLAG_TOCADDR_AS_BCD = (1 << 5), + IDE_AFLAG_TOCADDR_AS_BCD = (1 << 3), /* TOC track numbers are in BCD. */ - IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 6), + IDE_AFLAG_TOCTRACKS_AS_BCD = (1 << 4), /* * Drive does not provide data in multiples of SECTOR_SIZE * when more than one interrupt is needed. */ - IDE_AFLAG_LIMIT_NFRAMES = (1 << 7), + IDE_AFLAG_LIMIT_NFRAMES = (1 << 5), /* Saved TOC information is current. */ - IDE_AFLAG_TOC_VALID = (1 << 9), + IDE_AFLAG_TOC_VALID = (1 << 6), /* We think that the drive door is locked. */ - IDE_AFLAG_DOOR_LOCKED = (1 << 10), + IDE_AFLAG_DOOR_LOCKED = (1 << 7), /* SET_CD_SPEED command is unsupported. */ - IDE_AFLAG_NO_SPEED_SELECT = (1 << 11), - IDE_AFLAG_VERTOS_300_SSD = (1 << 12), - IDE_AFLAG_VERTOS_600_ESD = (1 << 13), - IDE_AFLAG_SANYO_3CD = (1 << 14), - IDE_AFLAG_FULL_CAPS_PAGE = (1 << 15), - IDE_AFLAG_PLAY_AUDIO_OK = (1 << 16), - IDE_AFLAG_LE_SPEED_FIELDS = (1 << 17), + IDE_AFLAG_NO_SPEED_SELECT = (1 << 8), + IDE_AFLAG_VERTOS_300_SSD = (1 << 9), + IDE_AFLAG_VERTOS_600_ESD = (1 << 10), + IDE_AFLAG_SANYO_3CD = (1 << 11), + IDE_AFLAG_FULL_CAPS_PAGE = (1 << 12), + IDE_AFLAG_PLAY_AUDIO_OK = (1 << 13), + IDE_AFLAG_LE_SPEED_FIELDS = (1 << 14), /* ide-floppy */ /* Avoid commands not supported in Clik drive */ - IDE_AFLAG_CLIK_DRIVE = (1 << 19), + IDE_AFLAG_CLIK_DRIVE = (1 << 15), /* Requires BH algorithm for packets */ - IDE_AFLAG_ZIP_DRIVE = (1 << 20), + IDE_AFLAG_ZIP_DRIVE = (1 << 16), /* Supports format progress report */ - IDE_AFLAG_SRFP = (1 << 22), + IDE_AFLAG_SRFP = (1 << 17), /* ide-tape */ - IDE_AFLAG_IGNORE_DSC = (1 << 23), + IDE_AFLAG_IGNORE_DSC = (1 << 18), /* 0 When the tape position is unknown */ - IDE_AFLAG_ADDRESS_VALID = (1 << 24), + IDE_AFLAG_ADDRESS_VALID = (1 << 19), /* Device already opened */ - IDE_AFLAG_BUSY = (1 << 25), + IDE_AFLAG_BUSY = (1 << 20), /* Attempt to auto-detect the current user block size */ - IDE_AFLAG_DETECT_BS = (1 << 26), + IDE_AFLAG_DETECT_BS = (1 << 21), /* Currently on a filemark */ - IDE_AFLAG_FILEMARK = (1 << 27), + IDE_AFLAG_FILEMARK = (1 << 22), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 28), + IDE_AFLAG_MEDIUM_PRESENT = (1 << 23), - IDE_AFLAG_NO_AUTOCLOSE = (1 << 29), + IDE_AFLAG_NO_AUTOCLOSE = (1 << 24), }; /* device flags */ @@ -565,28 +559,26 @@ enum { IDE_DFLAG_NODMA = (1 << 16), /* powermanagment told us not to do anything, so sleep nicely */ IDE_DFLAG_BLOCKED = (1 << 17), - /* ide-scsi emulation */ - IDE_DFLAG_SCSI = (1 << 18), /* sleeping & sleep field valid */ - IDE_DFLAG_SLEEPING = (1 << 19), - IDE_DFLAG_POST_RESET = (1 << 20), - IDE_DFLAG_UDMA33_WARNED = (1 << 21), - IDE_DFLAG_LBA48 = (1 << 22), + IDE_DFLAG_SLEEPING = (1 << 18), + IDE_DFLAG_POST_RESET = (1 << 19), + IDE_DFLAG_UDMA33_WARNED = (1 << 20), + IDE_DFLAG_LBA48 = (1 << 21), /* status of write cache */ - IDE_DFLAG_WCACHE = (1 << 23), + IDE_DFLAG_WCACHE = (1 << 22), /* used for ignoring ATA_DF */ - IDE_DFLAG_NOWERR = (1 << 24), + IDE_DFLAG_NOWERR = (1 << 23), /* retrying in PIO */ - IDE_DFLAG_DMA_PIO_RETRY = (1 << 25), - IDE_DFLAG_LBA = (1 << 26), + IDE_DFLAG_DMA_PIO_RETRY = (1 << 24), + IDE_DFLAG_LBA = (1 << 25), /* don't unload heads */ - IDE_DFLAG_NO_UNLOAD = (1 << 27), + IDE_DFLAG_NO_UNLOAD = (1 << 26), /* heads unloaded, please don't reset port */ - IDE_DFLAG_PARKED = (1 << 28), - IDE_DFLAG_MEDIA_CHANGED = (1 << 29), + IDE_DFLAG_PARKED = (1 << 27), + IDE_DFLAG_MEDIA_CHANGED = (1 << 28), /* write protect */ - IDE_DFLAG_WP = (1 << 30), - IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 31), + IDE_DFLAG_WP = (1 << 29), + IDE_DFLAG_FORMAT_IN_PROGRESS = (1 << 30), }; struct ide_drive_s { @@ -610,8 +602,6 @@ struct ide_drive_s { unsigned long dev_flags; unsigned long sleep; /* sleep until this time */ - unsigned long service_start; /* time we started last request */ - unsigned long service_time; /* service time of last request */ unsigned long timeout; /* max time to wait for irq */ special_t special; /* special action flags */ @@ -879,8 +869,6 @@ typedef struct hwgroup_s { /* BOOL: protects all fields below */ volatile int busy; - /* BOOL: wake us up on timer expiry */ - unsigned int sleeping : 1; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; @@ -1258,14 +1246,11 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); -static inline unsigned long ide_scsi_get_timeout(struct ide_atapi_pc *pc) -{ - return max_t(unsigned long, WAIT_CMD, pc->timeout - jiffies); -} +int ide_cd_expiry(ide_drive_t *); -int ide_scsi_expiry(ide_drive_t *); +int ide_cd_get_xferlen(struct request *); -ide_startstop_t ide_issue_pc(ide_drive_t *, unsigned int, ide_expiry_t *); +ide_startstop_t ide_issue_pc(ide_drive_t *); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); @@ -1287,6 +1272,26 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); + +static inline int ide_lock_hwgroup(ide_hwgroup_t *hwgroup) +{ + if (hwgroup->busy) + return 1; + + hwgroup->busy = 1; + /* for atari only */ + ide_get_lock(ide_intr, hwgroup); + + return 0; +} + +static inline void ide_unlock_hwgroup(ide_hwgroup_t *hwgroup) +{ + /* for atari only */ + ide_release_lock(); + hwgroup->busy = 0; +} + extern void do_ide_request(struct request_queue *); void ide_init_disk(struct gendisk *, ide_drive_t *); @@ -1533,6 +1538,7 @@ void ide_unregister_region(struct gendisk *); void ide_undecoded_slave(ide_drive_t *); void ide_port_apply_params(ide_hwif_t *); +int ide_sysfs_register_port(ide_hwif_t *); struct ide_host *ide_host_alloc(const struct ide_port_info *, hw_regs_t **); void ide_host_free(struct ide_host *); @@ -1627,6 +1633,9 @@ extern struct mutex ide_cfg_mtx; #define local_irq_set(flags) do { local_save_flags((flags)); local_irq_enable_in_hardirq(); } while (0) +char *ide_media_string(ide_drive_t *); + +extern struct device_attribute ide_dev_attrs[]; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 959f5522d10a..2f3c2d4ef73b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -12,6 +12,7 @@ #include <net/net_namespace.h> extern struct files_struct init_files; +extern struct fs_struct init_fs; #define INIT_KIOCTX(name, which_mm) \ { \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index be3c484b5242..990355fbc54e 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -111,13 +111,13 @@ extern void enable_irq(unsigned int irq); extern cpumask_t irq_default_affinity; -extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); +extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ -static inline int irq_set_affinity(unsigned int irq, cpumask_t cpumask) +static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) { return -EINVAL; } @@ -464,4 +464,10 @@ static inline void init_irq_proc(void) int show_interrupts(struct seq_file *p, void *v); +struct irq_desc; + +extern int early_irq_init(void); +extern int arch_early_irq_init(void); +extern int arch_init_chip_data(struct irq_desc *desc, int cpu); + #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 98564dc64476..f899b502f186 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -113,7 +113,8 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, cpumask_t dest); + void (*set_affinity)(unsigned int irq, + const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); int (*set_wake)(unsigned int irq, unsigned int on); @@ -193,42 +194,23 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -extern void early_irq_init(void); -extern void arch_early_irq_init(void); -extern void arch_init_chip_data(struct irq_desc *desc, int cpu); extern void arch_init_copy_chip_data(struct irq_desc *old_desc, struct irq_desc *desc, int cpu); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; - -static inline struct irq_desc *irq_to_desc(unsigned int irq) -{ - return (irq < NR_IRQS) ? irq_desc + irq : NULL; -} -static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) -{ - return irq_to_desc(irq); -} - -#else - -extern struct irq_desc *irq_to_desc(unsigned int irq); -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +#else /* CONFIG_SPARSE_IRQ */ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) - #define kstat_irqs_this_cpu(DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]) #define kstat_incr_irqs_this_cpu(irqno, DESC) \ ((DESC)->kstat_irqs[smp_processor_id()]++) -#endif +#endif /* CONFIG_SPARSE_IRQ */ + +extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); static inline struct irq_desc * irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 95d2b74641f5..5504a5c97836 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -15,20 +15,23 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) -#else +#else /* CONFIG_GENERIC_HARDIRQS */ extern int nr_irqs; +extern struct irq_desc *irq_to_desc(unsigned int irq); -#ifndef CONFIG_SPARSE_IRQ +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ + irq++, desc = irq_to_desc(irq)) \ + if (desc) -struct irq_desc; -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) -#endif -#endif + +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \ + irq--, desc = irq_to_desc(irq)) \ + if (desc) + +#endif /* CONFIG_GENERIC_HARDIRQS */ #define for_each_irq_nr(irq) \ for (irq = 0; irq < nr_irqs; irq++) diff --git a/include/linux/istallion.h b/include/linux/istallion.h index 0d1840723249..7faca98c7d14 100644 --- a/include/linux/istallion.h +++ b/include/linux/istallion.h @@ -59,9 +59,7 @@ struct stliport { unsigned int devnr; int baud_base; int custom_divisor; - int close_delay; int closing_wait; - int openwaitcnt; int rc; int argsize; void *argp; diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4ee4b3d2316f..570d20413119 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,10 +79,13 @@ static inline unsigned int kstat_irqs(unsigned int irq) } extern unsigned long long task_delta_exec(struct task_struct *); -extern void account_user_time(struct task_struct *, cputime_t); -extern void account_user_time_scaled(struct task_struct *, cputime_t); -extern void account_system_time(struct task_struct *, int, cputime_t); -extern void account_system_time_scaled(struct task_struct *, cputime_t); -extern void account_steal_time(struct task_struct *, cputime_t); +extern void account_user_time(struct task_struct *, cputime_t, cputime_t); +extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); +extern void account_steal_time(cputime_t); +extern void account_idle_time(cputime_t); + +extern void account_process_tick(struct task_struct *, int user); +extern void account_steal_ticks(unsigned long ticks); +extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f18b86fa8655..35525ac63337 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -83,6 +83,7 @@ struct kvm_irqchip { #define KVM_EXIT_S390_SIEIC 13 #define KVM_EXIT_S390_RESET 14 #define KVM_EXIT_DCR 15 +#define KVM_EXIT_NMI 16 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { @@ -387,6 +388,14 @@ struct kvm_trace_rec { #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 +#if defined(CONFIG_X86) +#define KVM_CAP_DEVICE_MSI 20 +#endif +/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 +#if defined(CONFIG_X86) +#define KVM_CAP_USER_NMI 22 +#endif /* * ioctls for VM fds @@ -458,6 +467,8 @@ struct kvm_trace_rec { #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +/* Available with KVM_CAP_NMI */ +#define KVM_NMI _IO(KVMIO, 0x9a) #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) @@ -500,10 +511,17 @@ struct kvm_assigned_irq { __u32 guest_irq; __u32 flags; union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; __u32 reserved[12]; }; }; #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bb92be2153bc..eafabd5c66b2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/preempt.h> #include <linux/marker.h> +#include <linux/msi.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -306,8 +307,14 @@ struct kvm_assigned_dev_kernel { int host_busnr; int host_devfn; int host_irq; + bool host_irq_disabled; int guest_irq; - int irq_requested; + struct msi_msg guest_msi; +#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) +#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) +#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) +#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) + unsigned long irq_requested_type; int irq_source_id; struct pci_dev *dev; struct kvm *kvm; @@ -316,8 +323,7 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian); +void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); diff --git a/include/linux/namei.h b/include/linux/namei.h index 99eb80306dc5..fc2e03579877 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -94,4 +94,9 @@ static inline char *nd_get_link(struct nameidata *nd) return nd->saved_names[nd->depth]; } +static inline void nd_terminate_link(void *name, size_t len, size_t maxlen) +{ + ((char *) name)[min(len, maxlen)] = '\0'; +} + #endif /* _LINUX_NAMEI_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b6e694454280..218c73b1e6d4 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1766,6 +1766,7 @@ #define PCI_DEVICE_ID_SIIG_8S_20x_650 0x2081 #define PCI_DEVICE_ID_SIIG_8S_20x_850 0x2082 #define PCI_SUBDEVICE_ID_SIIG_QUARTET_SERIAL 0x2050 +#define PCI_SUBDEVICE_ID_SIIG_DUAL_SERIAL 0x2530 #define PCI_VENDOR_ID_RADISYS 0x1331 @@ -1795,6 +1796,7 @@ #define PCI_DEVICE_ID_SEALEVEL_UCOMM232 0x7202 #define PCI_DEVICE_ID_SEALEVEL_COMM4 0x7401 #define PCI_DEVICE_ID_SEALEVEL_COMM8 0x7801 +#define PCI_DEVICE_ID_SEALEVEL_7803 0x7803 #define PCI_DEVICE_ID_SEALEVEL_UCOMM8 0x7804 #define PCI_VENDOR_ID_HYPERCOPE 0x1365 diff --git a/include/linux/sched.h b/include/linux/sched.h index 8395e715809d..38a3f4b15394 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(void); extern void task_rq_unlock_wait(struct task_struct *p); -extern cpumask_t nohz_cpu_mask; +extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); #else @@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout); extern void cpu_init (void); extern void trap_init(void); -extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); @@ -758,20 +757,51 @@ enum cpu_idle_type { #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ -#define BALANCE_FOR_MC_POWER \ - (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) +enum powersavings_balance_level { + POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ + POWERSAVINGS_BALANCE_BASIC, /* Fill one thread/core/package + * first for long running threads + */ + POWERSAVINGS_BALANCE_WAKEUP, /* Also bias task wakeups to semi-idle + * cpu package for power savings + */ + MAX_POWERSAVINGS_BALANCE_LEVELS +}; -#define BALANCE_FOR_PKG_POWER \ - ((sched_mc_power_savings || sched_smt_power_savings) ? \ - SD_POWERSAVINGS_BALANCE : 0) +extern int sched_mc_power_savings, sched_smt_power_savings; -#define test_sd_parent(sd, flag) ((sd->parent && \ - (sd->parent->flags & flag)) ? 1 : 0) +static inline int sd_balance_for_mc_power(void) +{ + if (sched_smt_power_savings) + return SD_POWERSAVINGS_BALANCE; + return 0; +} + +static inline int sd_balance_for_package_power(void) +{ + if (sched_mc_power_savings | sched_smt_power_savings) + return SD_POWERSAVINGS_BALANCE; + + return 0; +} + +/* + * Optimise SD flags for power savings: + * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. + * Keep default SD flags if sched_{smt,mc}_power_saving=0 + */ + +static inline int sd_power_saving_flags(void) +{ + if (sched_mc_power_savings | sched_smt_power_savings) + return SD_BALANCE_NEWIDLE; + + return 0; +} struct sched_group { struct sched_group *next; /* Must be a circular list */ - cpumask_t cpumask; /* * CPU power of this group, SCHED_LOAD_SCALE being max power for a @@ -784,8 +814,15 @@ struct sched_group { * (see include/linux/reciprocal_div.h) */ u32 reciprocal_cpu_power; + + unsigned long cpumask[]; }; +static inline struct cpumask *sched_group_cpus(struct sched_group *sg) +{ + return to_cpumask(sg->cpumask); +} + enum sched_domain_level { SD_LV_NONE = 0, SD_LV_SIBLING, @@ -809,7 +846,6 @@ struct sched_domain { struct sched_domain *parent; /* top domain must be null terminated */ struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ - cpumask_t span; /* span of all CPUs in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ @@ -864,18 +900,35 @@ struct sched_domain { #ifdef CONFIG_SCHED_DEBUG char *name; #endif + + /* span of all CPUs in this domain */ + unsigned long span[]; }; -extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, +static inline struct cpumask *sched_domain_span(struct sched_domain *sd) +{ + return to_cpumask(sd->span); +} + +extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new); extern int arch_reinit_sched_domains(void); +/* Test a flag in parent sched domain */ +static inline int test_sd_parent(struct sched_domain *sd, int flag) +{ + if (sd->parent && (sd->parent->flags & flag)) + return 1; + + return 0; +} + #else /* CONFIG_SMP */ struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, cpumask_t *doms_new, +partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new) { } @@ -926,7 +979,7 @@ struct sched_class { void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const cpumask_t *newmask); + const struct cpumask *newmask); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); @@ -1579,12 +1632,12 @@ extern cputime_t task_gtime(struct task_struct *p); #ifdef CONFIG_SMP extern int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask); + const struct cpumask *new_mask); #else static inline int set_cpus_allowed_ptr(struct task_struct *p, - const cpumask_t *new_mask) + const struct cpumask *new_mask) { - if (!cpu_isset(0, *new_mask)) + if (!cpumask_test_cpu(0, new_mask)) return -EINVAL; return 0; } @@ -2195,10 +2248,8 @@ __trace_special(void *__tr, void *__data, } #endif -extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -extern long sched_getaffinity(pid_t pid, cpumask_t *mask); - -extern int sched_mc_power_savings, sched_smt_power_savings; +extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); +extern long sched_getaffinity(pid_t pid, struct cpumask *mask); extern void normalize_rt_tasks(void); diff --git a/include/linux/security.h b/include/linux/security.h index 3416cb85e77b..b92b5e453f64 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -335,17 +335,37 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dir contains the inode structure of the parent directory of the new link. * @new_dentry contains the dentry structure for the new link. * Return 0 if permission is granted. + * @path_link: + * Check permission before creating a new hard link to a file. + * @old_dentry contains the dentry structure for an existing link + * to the file. + * @new_dir contains the path structure of the parent directory of + * the new link. + * @new_dentry contains the dentry structure for the new link. + * Return 0 if permission is granted. * @inode_unlink: * Check the permission to remove a hard link to a file. * @dir contains the inode structure of parent directory of the file. * @dentry contains the dentry structure for file to be unlinked. * Return 0 if permission is granted. + * @path_unlink: + * Check the permission to remove a hard link to a file. + * @dir contains the path structure of parent directory of the file. + * @dentry contains the dentry structure for file to be unlinked. + * Return 0 if permission is granted. * @inode_symlink: * Check the permission to create a symbolic link to a file. * @dir contains the inode structure of parent directory of the symbolic link. * @dentry contains the dentry structure of the symbolic link. * @old_name contains the pathname of file. * Return 0 if permission is granted. + * @path_symlink: + * Check the permission to create a symbolic link to a file. + * @dir contains the path structure of parent directory of + * the symbolic link. + * @dentry contains the dentry structure of the symbolic link. + * @old_name contains the pathname of file. + * Return 0 if permission is granted. * @inode_mkdir: * Check permissions to create a new directory in the existing directory * associated with inode strcture @dir. @@ -353,11 +373,25 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dentry contains the dentry structure of new directory. * @mode contains the mode of new directory. * Return 0 if permission is granted. + * @path_mkdir: + * Check permissions to create a new directory in the existing directory + * associated with path strcture @path. + * @dir containst the path structure of parent of the directory + * to be created. + * @dentry contains the dentry structure of new directory. + * @mode contains the mode of new directory. + * Return 0 if permission is granted. * @inode_rmdir: * Check the permission to remove a directory. * @dir contains the inode structure of parent of the directory to be removed. * @dentry contains the dentry structure of directory to be removed. * Return 0 if permission is granted. + * @path_rmdir: + * Check the permission to remove a directory. + * @dir contains the path structure of parent of the directory to be + * removed. + * @dentry contains the dentry structure of directory to be removed. + * Return 0 if permission is granted. * @inode_mknod: * Check permissions when creating a special file (or a socket or a fifo * file created via the mknod system call). Note that if mknod operation @@ -368,6 +402,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @mode contains the mode of the new file. * @dev contains the device number. * Return 0 if permission is granted. + * @path_mknod: + * Check permissions when creating a file. Note that this hook is called + * even if mknod operation is being done for a regular file. + * @dir contains the path structure of parent of the new file. + * @dentry contains the dentry structure of the new file. + * @mode contains the mode of the new file. + * @dev contains the undecoded device number. Use new_decode_dev() to get + * the decoded device number. + * Return 0 if permission is granted. * @inode_rename: * Check for permission to rename a file or directory. * @old_dir contains the inode structure for parent of the old link. @@ -375,6 +418,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the inode structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_rename: + * Check for permission to rename a file or directory. + * @old_dir contains the path structure for parent of the old link. + * @old_dentry contains the dentry structure of the old link. + * @new_dir contains the path structure for parent of the new link. + * @new_dentry contains the dentry structure of the new link. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -403,6 +453,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @dentry contains the dentry structure for the file. * @attr is the iattr structure containing the new file attributes. * Return 0 if permission is granted. + * @path_truncate: + * Check permission before truncating a file. + * @path contains the path structure for the file. + * @length is the new length of the file. + * @time_attrs is the flags passed to do_truncate(). + * Return 0 if permission is granted. * @inode_getattr: * Check permission before obtaining file attributes. * @mnt is the vfsmount where the dentry was looked up @@ -1331,6 +1387,22 @@ struct security_operations { struct super_block *newsb); int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); +#ifdef CONFIG_SECURITY_PATH + int (*path_unlink) (struct path *dir, struct dentry *dentry); + int (*path_mkdir) (struct path *dir, struct dentry *dentry, int mode); + int (*path_rmdir) (struct path *dir, struct dentry *dentry); + int (*path_mknod) (struct path *dir, struct dentry *dentry, int mode, + unsigned int dev); + int (*path_truncate) (struct path *path, loff_t length, + unsigned int time_attrs); + int (*path_symlink) (struct path *dir, struct dentry *dentry, + const char *old_name); + int (*path_link) (struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); + int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry); +#endif + int (*inode_alloc_security) (struct inode *inode); void (*inode_free_security) (struct inode *inode); int (*inode_init_security) (struct inode *inode, struct inode *dir, @@ -2705,6 +2777,71 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #endif /* CONFIG_SECURITY_NETWORK_XFRM */ +#ifdef CONFIG_SECURITY_PATH +int security_path_unlink(struct path *dir, struct dentry *dentry); +int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode); +int security_path_rmdir(struct path *dir, struct dentry *dentry); +int security_path_mknod(struct path *dir, struct dentry *dentry, int mode, + unsigned int dev); +int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs); +int security_path_symlink(struct path *dir, struct dentry *dentry, + const char *old_name); +int security_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry); +int security_path_rename(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry); +#else /* CONFIG_SECURITY_PATH */ +static inline int security_path_unlink(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static inline int security_path_mkdir(struct path *dir, struct dentry *dentry, + int mode) +{ + return 0; +} + +static inline int security_path_rmdir(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static inline int security_path_mknod(struct path *dir, struct dentry *dentry, + int mode, unsigned int dev) +{ + return 0; +} + +static inline int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs) +{ + return 0; +} + +static inline int security_path_symlink(struct path *dir, struct dentry *dentry, + const char *old_name) +{ + return 0; +} + +static inline int security_path_link(struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry) +{ + return 0; +} + +static inline int security_path_rename(struct path *old_dir, + struct dentry *old_dentry, + struct path *new_dir, + struct dentry *new_dentry) +{ + return 0; +} +#endif /* CONFIG_SECURITY_PATH */ + #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ea8d9265bf6..9136cc5608c3 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -10,8 +10,9 @@ #ifndef _LINUX_SERIAL_H #define _LINUX_SERIAL_H -#ifdef __KERNEL__ #include <linux/types.h> + +#ifdef __KERNEL__ #include <asm/page.h> /* diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 3d37c94abbc8..d4d2a78ad43e 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -28,6 +28,9 @@ struct plat_serial8250_port { unsigned char iotype; /* UPIO_* */ unsigned char hub6; upf_t flags; /* UPF_* flags */ + unsigned int type; /* If UPF_FIXED_TYPE */ + unsigned int (*serial_in)(struct uart_port *, int); + void (*serial_out)(struct uart_port *, int, int); }; /* diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index feb3b939ec4b..b4199841f1fc 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -40,7 +40,8 @@ #define PORT_NS16550A 14 #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ -#define PORT_MAX_8250 16 /* max port ID */ +#define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ +#define PORT_MAX_8250 17 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed @@ -248,6 +249,8 @@ struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ + unsigned int (*serial_in)(struct uart_port *, int); + void (*serial_out)(struct uart_port *, int, int); unsigned int irq; /* irq number */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ @@ -293,6 +296,8 @@ struct uart_port { #define UPF_MAGIC_MULTIPLIER ((__force upf_t) (1 << 16)) #define UPF_CONS_FLOW ((__force upf_t) (1 << 23)) #define UPF_SHARE_IRQ ((__force upf_t) (1 << 24)) +/* The exact UART type is known and should not be probed. */ +#define UPF_FIXED_TYPE ((__force upf_t) (1 << 27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) (1 << 28)) #define UPF_FIXED_PORT ((__force upf_t) (1 << 29)) #define UPF_DEAD ((__force upf_t) (1 << 30)) @@ -316,35 +321,13 @@ struct uart_port { }; /* - * This is the state information which is persistent across opens. - * The low level driver must not to touch any elements contained - * within. - */ -struct uart_state { - unsigned int close_delay; /* msec */ - unsigned int closing_wait; /* msec */ - -#define USF_CLOSING_WAIT_INF (0) -#define USF_CLOSING_WAIT_NONE (~0U) - - int count; - int pm_state; - struct uart_info *info; - struct uart_port *port; - - struct mutex mutex; -}; - -#define UART_XMIT_SIZE PAGE_SIZE - -typedef unsigned int __bitwise__ uif_t; - -/* * This is the state information which is only valid when the port - * is open; it may be freed by the core driver once the device has + * is open; it may be cleared the core driver once the device has * been closed. Either the low level driver or the core can modify * stuff here. */ +typedef unsigned int __bitwise__ uif_t; + struct uart_info { struct tty_port port; struct circ_buf xmit; @@ -366,6 +349,29 @@ struct uart_info { wait_queue_head_t delta_msr_wait; }; +/* + * This is the state information which is persistent across opens. + * The low level driver must not to touch any elements contained + * within. + */ +struct uart_state { + unsigned int close_delay; /* msec */ + unsigned int closing_wait; /* msec */ + +#define USF_CLOSING_WAIT_INF (0) +#define USF_CLOSING_WAIT_NONE (~0U) + + int count; + int pm_state; + struct uart_info info; + struct uart_port *port; + + struct mutex mutex; +}; + +#define UART_XMIT_SIZE PAGE_SIZE + + /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 @@ -439,8 +445,13 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port); #define uart_circ_chars_free(circ) \ (CIRC_SPACE((circ)->head, (circ)->tail, UART_XMIT_SIZE)) -#define uart_tx_stopped(portp) \ - ((portp)->info->port.tty->stopped || (portp)->info->port.tty->hw_stopped) +static inline int uart_tx_stopped(struct uart_port *port) +{ + struct tty_struct *tty = port->info->port.tty; + if(tty->stopped || tty->hw_stopped) + return 1; + return 0; +} /* * The following are helper functions for the low level drivers. @@ -451,7 +462,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) #ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { - handle_sysrq(ch, port->info ? port->info->port.tty : NULL); + handle_sysrq(ch, port->info->port.tty); port->sysrq = 0; return 1; } diff --git a/include/linux/topology.h b/include/linux/topology.h index 0c5b5ac36d8e..e632d29f0544 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -125,7 +125,8 @@ int arch_update_cpu_topology(void); | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ | SD_SHARE_PKG_RESOURCES\ - | BALANCE_FOR_MC_POWER, \ + | sd_balance_for_mc_power()\ + | sd_power_saving_flags(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -150,7 +151,8 @@ int arch_update_cpu_topology(void); | SD_BALANCE_FORK \ | SD_WAKE_AFFINE \ | SD_WAKE_BALANCE \ - | BALANCE_FOR_PKG_POWER,\ + | sd_balance_for_package_power()\ + | sd_power_saving_flags(),\ .last_balance = jiffies, \ .balance_interval = 1, \ } diff --git a/include/linux/tty.h b/include/linux/tty.h index 3f4954c55e53..fc39db95499f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -180,8 +180,17 @@ struct signal_struct; * until a hangup so don't use the wrong path. */ +struct tty_port; + +struct tty_port_operations { + /* Return 1 if the carrier is raised */ + int (*carrier_raised)(struct tty_port *port); + void (*raise_dtr_rts)(struct tty_port *port); +}; + struct tty_port { struct tty_struct *tty; /* Back pointer */ + const struct tty_port_operations *ops; /* Port operations */ spinlock_t lock; /* Lock protecting tty field */ int blocked_open; /* Waiting to open */ int count; /* Usage count */ @@ -253,6 +262,7 @@ struct tty_struct { unsigned int column; unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; unsigned char closing:1; + unsigned char echo_overrun:1; unsigned short minimum_to_wake; unsigned long overrun_time; int num_overrun; @@ -262,11 +272,16 @@ struct tty_struct { int read_tail; int read_cnt; unsigned long read_flags[N_TTY_BUF_SIZE/(8*sizeof(unsigned long))]; + unsigned char *echo_buf; + unsigned int echo_pos; + unsigned int echo_cnt; int canon_data; unsigned long canon_head; unsigned int canon_column; struct mutex atomic_read_lock; struct mutex atomic_write_lock; + struct mutex output_lock; + struct mutex echo_lock; unsigned char *write_buf; int write_cnt; spinlock_t read_lock; @@ -295,6 +310,7 @@ struct tty_struct { #define TTY_PUSH 6 /* n_tty private */ #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ +#define TTY_LDISC_CHANGING 10 /* Line discipline changing */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -354,8 +370,7 @@ extern int tty_write_room(struct tty_struct *tty); extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); -extern int tty_do_resize(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws); +extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); @@ -421,6 +436,14 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); +extern int tty_port_carrier_raised(struct tty_port *port); +extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_hangup(struct tty_port *port); +extern int tty_port_block_til_ready(struct tty_port *port, + struct tty_struct *tty, struct file *filp); +extern int tty_port_close_start(struct tty_port *port, + struct tty_struct *tty, struct file *filp); +extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc); extern int tty_unregister_ldisc(int disc); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 78416b901589..08e088334dba 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -196,8 +196,7 @@ * Optional: If not provided then the write method is called under * the atomic write lock to keep it serialized with the ldisc. * - * int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty, - * unsigned int rows, unsigned int cols); + * int (*resize)(struct tty_struct *tty, struct winsize *ws) * * Called when a termios request is issued which changes the * requested terminal geometry. @@ -258,8 +257,7 @@ struct tty_operations { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); - int (*resize)(struct tty_struct *tty, struct tty_struct *real_tty, - struct winsize *ws); + int (*resize)(struct tty_struct *tty, struct winsize *ws); int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct tty_driver *driver, int line, char *options); diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h index a102561e7026..fb7c359bdfba 100644 --- a/include/linux/usb/wusb-wa.h +++ b/include/linux/usb/wusb-wa.h @@ -51,6 +51,7 @@ enum { WUSB_REQ_GET_TIME = 25, WUSB_REQ_SET_STREAM_IDX = 26, WUSB_REQ_SET_WUSB_MAS = 27, + WUSB_REQ_CHAN_STOP = 28, }; diff --git a/include/linux/uwb.h b/include/linux/uwb.h index f9ccbd9a2ced..c02128991ff7 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -30,6 +30,7 @@ #include <linux/device.h> #include <linux/mutex.h> #include <linux/timer.h> +#include <linux/wait.h> #include <linux/workqueue.h> #include <linux/uwb/spec.h> @@ -66,6 +67,7 @@ struct uwb_dev { struct uwb_dev_addr dev_addr; int beacon_slot; DECLARE_BITMAP(streams, UWB_NUM_STREAMS); + DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS); }; #define to_uwb_dev(d) container_of(d, struct uwb_dev, dev) @@ -86,12 +88,31 @@ struct uwb_notifs_chain { struct mutex mutex; }; +/* Beacon cache list */ +struct uwb_beca { + struct list_head list; + size_t entries; + struct mutex mutex; +}; + +/* Event handling thread. */ +struct uwbd { + int pid; + struct task_struct *task; + wait_queue_head_t wq; + struct list_head event_list; + spinlock_t event_list_lock; +}; + /** * struct uwb_mas_bm - a bitmap of all MAS in a superframe * @bm: a bitmap of length #UWB_NUM_MAS */ struct uwb_mas_bm { DECLARE_BITMAP(bm, UWB_NUM_MAS); + DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS); + int safe; + int unsafe; }; /** @@ -117,14 +138,24 @@ struct uwb_mas_bm { * FIXME: further target states TBD. */ enum uwb_rsv_state { - UWB_RSV_STATE_NONE, + UWB_RSV_STATE_NONE = 0, UWB_RSV_STATE_O_INITIATED, UWB_RSV_STATE_O_PENDING, UWB_RSV_STATE_O_MODIFIED, UWB_RSV_STATE_O_ESTABLISHED, + UWB_RSV_STATE_O_TO_BE_MOVED, + UWB_RSV_STATE_O_MOVE_EXPANDING, + UWB_RSV_STATE_O_MOVE_COMBINING, + UWB_RSV_STATE_O_MOVE_REDUCING, UWB_RSV_STATE_T_ACCEPTED, UWB_RSV_STATE_T_DENIED, + UWB_RSV_STATE_T_CONFLICT, UWB_RSV_STATE_T_PENDING, + UWB_RSV_STATE_T_EXPANDING_ACCEPTED, + UWB_RSV_STATE_T_EXPANDING_CONFLICT, + UWB_RSV_STATE_T_EXPANDING_PENDING, + UWB_RSV_STATE_T_EXPANDING_DENIED, + UWB_RSV_STATE_T_RESIZED, UWB_RSV_STATE_LAST, }; @@ -149,6 +180,12 @@ struct uwb_rsv_target { }; }; +struct uwb_rsv_move { + struct uwb_mas_bm final_mas; + struct uwb_ie_drp *companion_drp_ie; + struct uwb_mas_bm companion_mas; +}; + /* * Number of streams reserved for reservations targeted at DevAddrs. */ @@ -186,6 +223,7 @@ typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv); * * @status: negotiation status * @stream: stream index allocated for this reservation + * @tiebreaker: conflict tiebreaker for this reservation * @mas: reserved MAS * @drp_ie: the DRP IE * @ie_valid: true iff the DRP IE matches the reservation parameters @@ -201,25 +239,29 @@ struct uwb_rsv { struct uwb_rc *rc; struct list_head rc_node; struct list_head pal_node; + struct kref kref; struct uwb_dev *owner; struct uwb_rsv_target target; enum uwb_drp_type type; int max_mas; int min_mas; - int sparsity; + int max_interval; bool is_multicast; uwb_rsv_cb_f callback; void *pal_priv; enum uwb_rsv_state state; + bool needs_release_companion_mas; u8 stream; + u8 tiebreaker; struct uwb_mas_bm mas; struct uwb_ie_drp *drp_ie; + struct uwb_rsv_move mv; bool ie_valid; struct timer_list timer; - bool expired; + struct work_struct handle_timeout_work; }; static const @@ -261,6 +303,13 @@ struct uwb_drp_avail { bool ie_valid; }; +struct uwb_drp_backoff_win { + u8 window; + u8 n; + int total_expired; + struct timer_list timer; + bool can_reserve_extra_mases; +}; const char *uwb_rsv_state_str(enum uwb_rsv_state state); const char *uwb_rsv_type_str(enum uwb_drp_type type); @@ -276,6 +325,8 @@ void uwb_rsv_terminate(struct uwb_rsv *rsv); void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv); +void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas); + /** * Radio Control Interface instance * @@ -337,23 +388,33 @@ struct uwb_rc { u8 ctx_roll; int beaconing; /* Beaconing state [channel number] */ + int beaconing_forced; int scanning; enum uwb_scan_type scan_type:3; unsigned ready:1; struct uwb_notifs_chain notifs_chain; + struct uwb_beca uwb_beca; + + struct uwbd uwbd; + struct uwb_drp_backoff_win bow; struct uwb_drp_avail drp_avail; struct list_head reservations; + struct list_head cnflt_alien_list; + struct uwb_mas_bm cnflt_alien_bitmap; struct mutex rsvs_mutex; + spinlock_t rsvs_lock; struct workqueue_struct *rsv_workq; - struct work_struct rsv_update_work; + struct delayed_work rsv_update_work; + struct delayed_work rsv_alien_bp_work; + int set_drp_ie_pending; struct mutex ies_mutex; struct uwb_rc_cmd_set_ie *ies; size_t ies_capacity; - spinlock_t pal_lock; struct list_head pals; + int active_pals; struct uwb_dbg *dbg; }; @@ -361,11 +422,19 @@ struct uwb_rc { /** * struct uwb_pal - a UWB PAL - * @name: descriptive name for this PAL (wushc, wlp, etc.). + * @name: descriptive name for this PAL (wusbhc, wlp, etc.). * @device: a device for the PAL. Used to link the PAL and the radio * controller in sysfs. + * @rc: the radio controller the PAL uses. + * @channel_changed: called when the channel used by the radio changes. + * A channel of -1 means the channel has been stopped. * @new_rsv: called when a peer requests a reservation (may be NULL if * the PAL cannot accept reservation requests). + * @channel: channel being used by the PAL; 0 if the PAL isn't using + * the radio; -1 if the PAL wishes to use the radio but + * cannot. + * @debugfs_dir: a debugfs directory which the PAL can use for its own + * debugfs files. * * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). @@ -384,12 +453,21 @@ struct uwb_pal { struct list_head node; const char *name; struct device *device; - void (*new_rsv)(struct uwb_rsv *rsv); + struct uwb_rc *rc; + + void (*channel_changed)(struct uwb_pal *pal, int channel); + void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv); + + int channel; + struct dentry *debugfs_dir; }; void uwb_pal_init(struct uwb_pal *pal); -int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal); -void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal); +int uwb_pal_register(struct uwb_pal *pal); +void uwb_pal_unregister(struct uwb_pal *pal); + +int uwb_radio_start(struct uwb_pal *pal); +void uwb_radio_stop(struct uwb_pal *pal); /* * General public API @@ -443,8 +521,6 @@ ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name, struct uwb_rccb *cmd, size_t cmd_size, u8 expected_type, u16 expected_event, struct uwb_rceb **preply); -ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **); -int uwb_bg_joined(struct uwb_rc *rc); size_t __uwb_addr_print(char *, size_t, const unsigned char *, int); @@ -520,6 +596,8 @@ void uwb_rc_rm(struct uwb_rc *); void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); void uwb_rc_neh_error(struct uwb_rc *, int); void uwb_rc_reset_all(struct uwb_rc *rc); +void uwb_rc_pre_reset(struct uwb_rc *rc); +void uwb_rc_post_reset(struct uwb_rc *rc); /** * uwb_rsv_is_owner - is the owner of this reservation the RC? @@ -531,7 +609,9 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) } /** - * Events generated by UWB that can be passed to any listeners + * enum uwb_notifs - UWB events that can be passed to any listeners + * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group. + * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group. * * Higher layers can register callback functions with the radio * controller using uwb_notifs_register(). The radio controller @@ -539,8 +619,6 @@ static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) * nodes when an event occurs. */ enum uwb_notifs { - UWB_NOTIF_BG_JOIN = 0, /* radio controller joined a beacon group */ - UWB_NOTIF_BG_LEAVE = 1, /* radio controller left a beacon group */ UWB_NOTIF_ONAIR, UWB_NOTIF_OFFAIR, }; @@ -652,22 +730,9 @@ static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe) /* Information Element handling */ -/* For representing the state of writing to a buffer when iterating */ -struct uwb_buf_ctx { - char *buf; - size_t bytes, size; -}; - -typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *, - size_t, void *); struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); -ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data, - const void *buf, size_t size); -int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *, - size_t, void *); -int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *); -struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); - +int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size); +int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id); /* * Transmission statistics diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h index 1141f41bab5c..8da004e25628 100644 --- a/include/linux/uwb/debug-cmd.h +++ b/include/linux/uwb/debug-cmd.h @@ -32,6 +32,10 @@ enum uwb_dbg_cmd_type { UWB_DBG_CMD_RSV_ESTABLISH = 1, UWB_DBG_CMD_RSV_TERMINATE = 2, + UWB_DBG_CMD_IE_ADD = 3, + UWB_DBG_CMD_IE_RM = 4, + UWB_DBG_CMD_RADIO_START = 5, + UWB_DBG_CMD_RADIO_STOP = 6, }; struct uwb_dbg_cmd_rsv_establish { @@ -39,18 +43,25 @@ struct uwb_dbg_cmd_rsv_establish { __u8 type; __u16 max_mas; __u16 min_mas; - __u8 sparsity; + __u8 max_interval; }; struct uwb_dbg_cmd_rsv_terminate { int index; }; +struct uwb_dbg_cmd_ie { + __u8 data[128]; + int len; +}; + struct uwb_dbg_cmd { __u32 type; union { struct uwb_dbg_cmd_rsv_establish rsv_establish; struct uwb_dbg_cmd_rsv_terminate rsv_terminate; + struct uwb_dbg_cmd_ie ie_add; + struct uwb_dbg_cmd_ie ie_rm; }; }; diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h deleted file mode 100644 index a86a73fe303f..000000000000 --- a/include/linux/uwb/debug.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Ultra Wide Band - * Debug Support - * - * Copyright (C) 2005-2006 Intel Corporation - * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA. - * - * - * FIXME: doc - * Invoke like: - * - * #define D_LOCAL 4 - * #include <linux/uwb/debug.h> - * - * At the end of your include files. - */ -#include <linux/types.h> - -struct device; -extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize); - -/* Master debug switch; !0 enables, 0 disables */ -#define D_MASTER (!0) - -/* Local (per-file) debug switch; #define before #including */ -#ifndef D_LOCAL -#define D_LOCAL 0 -#endif - -#undef __d_printf -#undef d_fnstart -#undef d_fnend -#undef d_printf -#undef d_dump - -#define __d_printf(l, _tag, _dev, f, a...) \ -do { \ - struct device *__dev = (_dev); \ - if (D_MASTER && D_LOCAL >= (l)) { \ - char __head[64] = ""; \ - if (_dev != NULL) { \ - if ((unsigned long)__dev < 4096) \ - printk(KERN_ERR "E: Corrupt dev %p\n", \ - __dev); \ - else \ - snprintf(__head, sizeof(__head), \ - "%s %s: ", \ - dev_driver_string(__dev), \ - __dev->bus_id); \ - } \ - printk(KERN_ERR "%s%s" _tag ": " f, __head, \ - __func__, ## a); \ - } \ -} while (0 && _dev) - -#define d_fnstart(l, _dev, f, a...) \ - __d_printf(l, " FNSTART", _dev, f, ## a) -#define d_fnend(l, _dev, f, a...) \ - __d_printf(l, " FNEND", _dev, f, ## a) -#define d_printf(l, _dev, f, a...) \ - __d_printf(l, "", _dev, f, ## a) -#define d_dump(l, _dev, ptr, size) \ -do { \ - struct device *__dev = _dev; \ - if (D_MASTER && D_LOCAL >= (l)) \ - dump_bytes(__dev, ptr, size); \ -} while (0 && _dev) -#define d_test(l) (D_MASTER && D_LOCAL >= (l)) diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h index 198c15f8e251..b52e44f1bd33 100644 --- a/include/linux/uwb/spec.h +++ b/include/linux/uwb/spec.h @@ -59,6 +59,11 @@ enum { UWB_NUM_ZONES = 16 }; #define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES) /* + * Number of MAS required before a row can be considered available. + */ +#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1) + +/* * Number of streams per DRP reservation between a pair of devices. * * [ECMA-368] section 16.8.6. @@ -94,6 +99,26 @@ enum { UWB_BEACON_SLOT_LENGTH_US = 85 }; enum { UWB_MAX_LOST_BEACONS = 3 }; /* + * mDRPBackOffWinMin + * + * The minimum number of superframes to wait before trying to reserve + * extra MAS. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_DRP_BACKOFF_WIN_MIN = 2 }; + +/* + * mDRPBackOffWinMax + * + * The maximum number of superframes to wait before trying to reserve + * extra MAS. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_DRP_BACKOFF_WIN_MAX = 16 }; + +/* * Length of a superframe in microseconds. */ #define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS) @@ -200,6 +225,12 @@ enum uwb_drp_reason { UWB_DRP_REASON_MODIFIED, }; +/** Relinquish Request Reason Codes ([ECMA-368] table 113) */ +enum uwb_relinquish_req_reason { + UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0, + UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION, +}; + /** * DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9]) */ @@ -252,6 +283,7 @@ enum uwb_ie { UWB_APP_SPEC_PROBE_IE = 15, UWB_IDENTIFICATION_IE = 19, UWB_MASTER_KEY_ID_IE = 20, + UWB_RELINQUISH_REQUEST_IE = 21, UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */ UWB_APP_SPEC_IE = 255, }; @@ -365,6 +397,27 @@ struct uwb_ie_drp_avail { DECLARE_BITMAP(bmp, UWB_NUM_MAS); } __attribute__((packed)); +/* Relinqish Request IE ([ECMA-368] section 16.8.19). */ +struct uwb_relinquish_request_ie { + struct uwb_ie_hdr hdr; + __le16 relinquish_req_control; + struct uwb_dev_addr dev_addr; + struct uwb_drp_alloc allocs[]; +} __attribute__((packed)); + +static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie) +{ + return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf; +} + +static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie, + int reason_code) +{ + u16 ctrl = le16_to_cpu(ie->relinquish_req_control); + ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0); + ie->relinquish_req_control = cpu_to_le16(ctrl); +} + /** * The Vendor ID is set to an OUI that indicates the vendor of the device. * ECMA-368 [16.8.10] diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h index 36a39e34f8d7..4b4fc0f43855 100644 --- a/include/linux/uwb/umc.h +++ b/include/linux/uwb/umc.h @@ -89,6 +89,8 @@ struct umc_driver { void (*remove)(struct umc_dev *); int (*suspend)(struct umc_dev *, pm_message_t state); int (*resume)(struct umc_dev *); + int (*pre_reset)(struct umc_dev *); + int (*post_reset)(struct umc_dev *); struct device_driver driver; }; diff --git a/include/linux/wlp.h b/include/linux/wlp.h index 033545e145c7..ac95ce6606ac 100644 --- a/include/linux/wlp.h +++ b/include/linux/wlp.h @@ -646,6 +646,7 @@ struct wlp_wss { struct wlp { struct mutex mutex; struct uwb_rc *rc; /* UWB radio controller */ + struct net_device *ndev; struct uwb_pal pal; struct wlp_eda eda; struct wlp_uuid uuid; @@ -675,7 +676,7 @@ struct wlp_wss_attribute { static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode, \ _show, _store) -extern int wlp_setup(struct wlp *, struct uwb_rc *); +extern int wlp_setup(struct wlp *, struct uwb_rc *, struct net_device *ndev); extern void wlp_remove(struct wlp *); extern ssize_t wlp_neighborhood_show(struct wlp *, char *); extern int wlp_wss_setup(struct net_device *, struct wlp_wss *); diff --git a/init/Kconfig b/init/Kconfig index 13627191a60d..f6281711166d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -924,6 +924,15 @@ config KMOD endif # MODULES +config INIT_ALL_POSSIBLE + bool + help + Back when each arch used to define their own cpu_online_map and + cpu_possible_map, some of them chose to initialize cpu_possible_map + with all 1s, and others with all 0s. When they were centralised, + it was better to provide this option than to break all the archs + and have several arch maintainers persuing me down dark alleys. + config STOP_MACHINE bool default y diff --git a/init/main.c b/init/main.c index 2a7ce0f8e453..ad8f9f53f8d1 100644 --- a/init/main.c +++ b/init/main.c @@ -75,15 +75,6 @@ #include <asm/smp.h> #endif -/* - * This is one of the first .c files built. Error out early if we have compiler - * trouble. - */ - -#if __GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 0 -#warning gcc-4.1.0 is known to miscompile the kernel. A different compiler version is recommended. -#endif - static int kernel_init(void *); extern void init_IRQ(void); @@ -540,15 +531,6 @@ void __init __weak thread_info_cache_init(void) { } -void __init __weak arch_early_irq_init(void) -{ -} - -void __init __weak early_irq_init(void) -{ - arch_early_irq_init(); -} - asmlinkage void __init start_kernel(void) { char * command_line; diff --git a/kernel/cpu.c b/kernel/cpu.c index 8ea32e8d68b0..bae131a1211b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -24,19 +24,20 @@ cpumask_t cpu_present_map __read_mostly; EXPORT_SYMBOL(cpu_present_map); -#ifndef CONFIG_SMP - /* * Represents all cpu's that are currently online. */ -cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); +#ifdef CONFIG_INIT_ALL_POSSIBLE cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; +#else +cpumask_t cpu_possible_map __read_mostly; +#endif EXPORT_SYMBOL(cpu_possible_map); -#else /* CONFIG_SMP */ - +#ifdef CONFIG_SMP /* Serializes the updates to cpu_online_map, cpu_present_map */ static DEFINE_MUTEX(cpu_add_remove_lock); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 96c0ba13b8cd..39c1a4c1c5a9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf) if (!*buf) { cpus_clear(trialcs.cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs.cpus_allowed); + retval = cpulist_parse(buf, &trialcs.cpus_allowed); if (retval < 0) return retval; @@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) mask = cs->cpus_allowed; mutex_unlock(&callback_mutex); - return cpulist_scnprintf(page, PAGE_SIZE, mask); + return cpulist_scnprintf(page, PAGE_SIZE, &mask); } static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index bda9cb924276..eb2bfefa6dcc 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -32,7 +32,6 @@ */ #include <linux/cpu.h> -#include <linux/irq.h> #include <linux/module.h> #include <linux/percpu.h> #include <linux/hrtimer.h> diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 650ce4102a63..cc0f7321b8ce 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -40,9 +40,6 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* @@ -71,9 +68,6 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; @@ -92,9 +86,6 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; @@ -133,9 +124,6 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; @@ -178,9 +166,6 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { - if (!desc) - continue; - spin_lock_irq(&desc->lock); status = desc->status; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6eb3c7952b64..f63c706d25e1 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -46,7 +46,7 @@ void dynamic_irq_init(unsigned int irq) desc->irq_count = 0; desc->irqs_unhandled = 0; #ifdef CONFIG_SMP - cpus_setall(desc->affinity); + cpumask_setall(&desc->affinity); #endif spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6492400cb50d..c20db0be9173 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -56,10 +56,6 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); -void __init __attribute__((weak)) arch_early_irq_init(void) -{ -} - #ifdef CONFIG_SPARSE_IRQ static struct irq_desc irq_desc_init = { .irq = -1, @@ -90,13 +86,11 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) desc->kstat_irqs = (unsigned int *)ptr; } -void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) -{ -} - static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); + + spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP desc->cpu = cpu; @@ -134,7 +128,7 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm /* FIXME: use bootmem alloc ...*/ static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS]; -void __init early_irq_init(void) +int __init early_irq_init(void) { struct irq_desc *desc; int legacy_count; @@ -146,6 +140,7 @@ void __init early_irq_init(void) for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy[i]; + lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); irq_desc_ptrs[i] = desc + i; } @@ -153,7 +148,7 @@ void __init early_irq_init(void) for (i = legacy_count; i < NR_IRQS; i++) irq_desc_ptrs[i] = NULL; - arch_early_irq_init(); + return arch_early_irq_init(); } struct irq_desc *irq_to_desc(unsigned int irq) @@ -203,7 +198,7 @@ out_unlock: return desc; } -#else +#else /* !CONFIG_SPARSE_IRQ */ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { [0 ... NR_IRQS-1] = { @@ -218,7 +213,31 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; -#endif +int __init early_irq_init(void) +{ + struct irq_desc *desc; + int count; + int i; + + desc = irq_desc; + count = ARRAY_SIZE(irq_desc); + + for (i = 0; i < count; i++) + desc[i].irq = i; + + return arch_early_irq_init(); +} + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return (irq < NR_IRQS) ? irq_desc + irq : NULL; +} + +struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +{ + return irq_to_desc(irq); +} +#endif /* !CONFIG_SPARSE_IRQ */ /* * What should we do if we get a hw irq event on an illegal vector? @@ -428,9 +447,6 @@ void early_init_irq_lock_class(void) int i; for_each_irq_desc(i, desc) { - if (!desc) - continue; - lockdep_set_class(&desc->lock, &irq_desc_lock_class); } } @@ -439,7 +455,7 @@ void early_init_irq_lock_class(void) unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); - return desc->kstat_irqs[cpu]; + return desc ? desc->kstat_irqs[cpu] : 0; } #endif EXPORT_SYMBOL(kstat_irqs_cpu); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 540f6c49f3fa..61c4a9b62165 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -79,7 +79,7 @@ int irq_can_set_affinity(unsigned int irq) * @cpumask: cpumask * */ -int irq_set_affinity(unsigned int irq, cpumask_t cpumask) +int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -91,14 +91,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { - desc->affinity = cpumask; + cpumask_copy(&desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); } else { desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = cpumask; + cpumask_copy(&desc->pending_mask, cpumask); } #else - desc->affinity = cpumask; + cpumask_copy(&desc->affinity, cpumask); desc->chip->set_affinity(irq, cpumask); #endif desc->status |= IRQ_AFFINITY_SET; @@ -112,26 +112,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask) */ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) { - cpumask_t mask; - if (!irq_can_set_affinity(irq)) return 0; - cpus_and(mask, cpu_online_map, irq_default_affinity); - /* * Preserve an userspace affinity setup, but make sure that * one of the targets is online. */ if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) { - if (cpus_intersects(desc->affinity, cpu_online_map)) - mask = desc->affinity; + if (cpumask_any_and(&desc->affinity, cpu_online_mask) + < nr_cpu_ids) + goto set_affinity; else desc->status &= ~IRQ_AFFINITY_SET; } - desc->affinity = mask; - desc->chip->set_affinity(irq, mask); + cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity); +set_affinity: + desc->chip->set_affinity(irq, &desc->affinity); return 0; } diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 9db681d95814..bd72329e630c 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -4,7 +4,6 @@ void move_masked_irq(int irq) { struct irq_desc *desc = irq_to_desc(irq); - cpumask_t tmp; if (likely(!(desc->status & IRQ_MOVE_PENDING))) return; @@ -19,7 +18,7 @@ void move_masked_irq(int irq) desc->status &= ~IRQ_MOVE_PENDING; - if (unlikely(cpus_empty(desc->pending_mask))) + if (unlikely(cpumask_empty(&desc->pending_mask))) return; if (!desc->chip->set_affinity) @@ -27,8 +26,6 @@ void move_masked_irq(int irq) assert_spin_locked(&desc->lock); - cpus_and(tmp, desc->pending_mask, cpu_online_map); - /* * If there was a valid mask to work with, please * do the disable, re-program, enable sequence. @@ -41,10 +38,13 @@ void move_masked_irq(int irq) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(!cpus_empty(tmp))) { - desc->chip->set_affinity(irq,tmp); + if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask) + < nr_cpu_ids)) { + cpumask_and(&desc->affinity, + &desc->pending_mask, cpu_online_mask); + desc->chip->set_affinity(irq, &desc->affinity); } - cpus_clear(desc->pending_mask); + cpumask_clear(&desc->pending_mask); } void move_native_irq(int irq) diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 089c3746358a..ecf765c6a77a 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -42,6 +42,7 @@ static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); + spin_lock_init(&desc->lock); desc->cpu = cpu; lockdep_set_class(&desc->lock, &irq_desc_lock_class); init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); @@ -74,10 +75,8 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", - irq, cpu, node); if (!desc) { - printk(KERN_ERR "can not get new irq_desc for moving\n"); + printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq); /* still use old one */ desc = old_desc; goto out_unlock; @@ -106,8 +105,6 @@ struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) return desc; old_cpu = desc->cpu; - printk(KERN_DEBUG - "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); if (old_cpu != cpu) { node = cpu_to_node(cpu); old_node = cpu_to_node(old_cpu); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index f6b3440f05bc..d2c0e5ee53c5 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data; - cpumask_t new_value; + cpumask_var_t new_value; int err; if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity || irq_balancing_disabled(irq)) return -EIO; + if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + return -ENOMEM; + err = cpumask_parse_user(buffer, count, new_value); if (err) - return err; + goto free_cpumask; - if (!is_affinity_mask_valid(new_value)) - return -EINVAL; + if (!is_affinity_mask_valid(*new_value)) { + err = -EINVAL; + goto free_cpumask; + } /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ - if (!cpus_intersects(new_value, cpu_online_map)) + if (!cpumask_intersects(new_value, cpu_online_mask)) { /* Special case for empty set - allow the architecture code to set default SMP affinity. */ - return irq_select_affinity_usr(irq) ? -EINVAL : count; - - irq_set_affinity(irq, new_value); + err = irq_select_affinity_usr(irq) ? -EINVAL : count; + } else { + irq_set_affinity(irq, new_value); + err = count; + } - return count; +free_cpumask: + free_cpumask_var(new_value); + return err; } static int irq_affinity_proc_open(struct inode *inode, struct file *file) @@ -95,7 +104,7 @@ static ssize_t default_affinity_write(struct file *file, cpumask_t new_value; int err; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 3738107531fd..dd364c11e56e 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -91,9 +91,6 @@ static int misrouted_irq(int irq) int i, ok = 0; for_each_irq_desc(i, desc) { - if (!desc) - continue; - if (!i) continue; @@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy) for_each_irq_desc(i, desc) { unsigned int status; - if (!desc) - continue; if (!i) continue; diff --git a/kernel/profile.c b/kernel/profile.c index 60adefb59b5e..4cb7d68fed82 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -442,7 +442,7 @@ void profile_tick(int type) static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, (cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file, unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index e503a002f330..c03ca3e61919 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp) * unnecessarily. */ smp_mb(); - cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); + cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask); rcp->signaled = 0; } diff --git a/kernel/sched.c b/kernel/sched.c index fff1c4a20b65..930bf2e6d714 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -498,18 +498,26 @@ struct rt_rq { */ struct root_domain { atomic_t refcount; - cpumask_t span; - cpumask_t online; + cpumask_var_t span; + cpumask_var_t online; /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. */ - cpumask_t rto_mask; + cpumask_var_t rto_mask; atomic_t rto_count; #ifdef CONFIG_SMP struct cpupri cpupri; #endif +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + /* + * Preferred wake up cpu nominated by sched_mc balance that will be + * used when most cpus are idle in the system indicating overall very + * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) + */ + unsigned int sched_mc_preferred_wakeup_cpu; +#endif }; /* @@ -1514,7 +1522,7 @@ static int tg_shares_up(struct task_group *tg, void *data) struct sched_domain *sd = data; int i; - for_each_cpu_mask(i, sd->span) { + for_each_cpu(i, sched_domain_span(sd)) { /* * If there are currently no tasks on the cpu pretend there * is one of average load so that when a new task gets to @@ -1535,7 +1543,7 @@ static int tg_shares_up(struct task_group *tg, void *data) if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) shares = tg->shares; - for_each_cpu_mask(i, sd->span) + for_each_cpu(i, sched_domain_span(sd)) update_group_shares_cpu(tg, i, shares, rq_weight); return 0; @@ -2101,15 +2109,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int i; /* Skip over this group if it has no CPUs allowed */ - if (!cpus_intersects(group->cpumask, p->cpus_allowed)) + if (!cpumask_intersects(sched_group_cpus(group), + &p->cpus_allowed)) continue; - local_group = cpu_isset(this_cpu, group->cpumask); + local_group = cpumask_test_cpu(this_cpu, + sched_group_cpus(group)); /* Tally up the load of all CPUs in the group */ avg_load = 0; - for_each_cpu_mask_nr(i, group->cpumask) { + for_each_cpu(i, sched_group_cpus(group)) { /* Bias balancing toward cpus of our domain */ if (local_group) load = source_load(i, load_idx); @@ -2141,17 +2151,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) * find_idlest_cpu - find the idlest cpu among the cpus in group. */ static int -find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, - cpumask_t *tmp) +find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) { unsigned long load, min_load = ULONG_MAX; int idlest = -1; int i; /* Traverse only the allowed CPUs */ - cpus_and(*tmp, group->cpumask, p->cpus_allowed); - - for_each_cpu_mask_nr(i, *tmp) { + for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { load = weighted_cpuload(i); if (load < min_load || (load == min_load && i == this_cpu)) { @@ -2193,7 +2200,6 @@ static int sched_balance_self(int cpu, int flag) update_shares(sd); while (sd) { - cpumask_t span, tmpmask; struct sched_group *group; int new_cpu, weight; @@ -2202,14 +2208,13 @@ static int sched_balance_self(int cpu, int flag) continue; } - span = sd->span; group = find_idlest_group(sd, t, cpu); if (!group) { sd = sd->child; continue; } - new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); + new_cpu = find_idlest_cpu(group, t, cpu); if (new_cpu == -1 || new_cpu == cpu) { /* Now try balancing at a lower domain level of cpu */ sd = sd->child; @@ -2218,10 +2223,10 @@ static int sched_balance_self(int cpu, int flag) /* Now try balancing at a lower domain level of new_cpu */ cpu = new_cpu; + weight = cpumask_weight(sched_domain_span(sd)); sd = NULL; - weight = cpus_weight(span); for_each_domain(cpu, tmp) { - if (weight <= cpus_weight(tmp->span)) + if (weight <= cpumask_weight(sched_domain_span(tmp))) break; if (tmp->flags & flag) sd = tmp; @@ -2266,7 +2271,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) cpu = task_cpu(p); for_each_domain(this_cpu, sd) { - if (cpu_isset(cpu, sd->span)) { + if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { update_shares(sd); break; } @@ -2315,7 +2320,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) else { struct sched_domain *sd; for_each_domain(this_cpu, sd) { - if (cpu_isset(cpu, sd->span)) { + if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { schedstat_inc(sd, ttwu_wake_remote); break; } @@ -2846,7 +2851,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) struct rq *rq; rq = task_rq_lock(p, &flags); - if (!cpu_isset(dest_cpu, p->cpus_allowed) + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) || unlikely(!cpu_active(dest_cpu))) goto out; @@ -2911,7 +2916,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (!cpu_isset(this_cpu, p->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) { schedstat_inc(p, se.nr_failed_migrations_affine); return 0; } @@ -3086,7 +3091,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum cpu_idle_type idle, - int *sd_idle, const cpumask_t *cpus, int *balance) + int *sd_idle, const struct cpumask *cpus, int *balance) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -3122,10 +3127,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long sum_avg_load_per_task; unsigned long avg_load_per_task; - local_group = cpu_isset(this_cpu, group->cpumask); + local_group = cpumask_test_cpu(this_cpu, + sched_group_cpus(group)); if (local_group) - balance_cpu = first_cpu(group->cpumask); + balance_cpu = cpumask_first(sched_group_cpus(group)); /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; @@ -3134,13 +3140,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, max_cpu_load = 0; min_cpu_load = ~0UL; - for_each_cpu_mask_nr(i, group->cpumask) { - struct rq *rq; - - if (!cpu_isset(i, *cpus)) - continue; - - rq = cpu_rq(i); + for_each_cpu_and(i, sched_group_cpus(group), cpus) { + struct rq *rq = cpu_rq(i); if (*sd_idle && rq->nr_running) *sd_idle = 0; @@ -3251,8 +3252,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, */ if ((sum_nr_running < min_nr_running) || (sum_nr_running == min_nr_running && - first_cpu(group->cpumask) < - first_cpu(group_min->cpumask))) { + cpumask_first(sched_group_cpus(group)) > + cpumask_first(sched_group_cpus(group_min)))) { group_min = group; min_nr_running = sum_nr_running; min_load_per_task = sum_weighted_load / @@ -3267,8 +3268,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, if (sum_nr_running <= group_capacity - 1) { if (sum_nr_running > leader_nr_running || (sum_nr_running == leader_nr_running && - first_cpu(group->cpumask) > - first_cpu(group_leader->cpumask))) { + cpumask_first(sched_group_cpus(group)) < + cpumask_first(sched_group_cpus(group_leader)))) { group_leader = group; leader_nr_running = sum_nr_running; } @@ -3394,6 +3395,10 @@ out_balanced: if (this == group_leader && group_leader != group_min) { *imbalance = min_load_per_task; + if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { + cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = + cpumask_first(sched_group_cpus(group_leader)); + } return group_min; } #endif @@ -3407,16 +3412,16 @@ ret: */ static struct rq * find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, - unsigned long imbalance, const cpumask_t *cpus) + unsigned long imbalance, const struct cpumask *cpus) { struct rq *busiest = NULL, *rq; unsigned long max_load = 0; int i; - for_each_cpu_mask_nr(i, group->cpumask) { + for_each_cpu(i, sched_group_cpus(group)) { unsigned long wl; - if (!cpu_isset(i, *cpus)) + if (!cpumask_test_cpu(i, cpus)) continue; rq = cpu_rq(i); @@ -3446,7 +3451,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, - int *balance, cpumask_t *cpus) + int *balance, struct cpumask *cpus) { int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; @@ -3454,7 +3459,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct rq *busiest; unsigned long flags; - cpus_setall(*cpus); + cpumask_setall(cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3514,8 +3519,8 @@ redo: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(all_pinned)) { - cpu_clear(cpu_of(busiest), *cpus); - if (!cpus_empty(*cpus)) + cpumask_clear_cpu(cpu_of(busiest), cpus); + if (!cpumask_empty(cpus)) goto redo; goto out_balanced; } @@ -3532,7 +3537,8 @@ redo: /* don't kick the migration_thread, if the curr * task on busiest cpu can't be moved to this_cpu */ - if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { + if (!cpumask_test_cpu(this_cpu, + &busiest->curr->cpus_allowed)) { spin_unlock_irqrestore(&busiest->lock, flags); all_pinned = 1; goto out_one_pinned; @@ -3607,7 +3613,7 @@ out: */ static int load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, - cpumask_t *cpus) + struct cpumask *cpus) { struct sched_group *group; struct rq *busiest = NULL; @@ -3616,7 +3622,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, int sd_idle = 0; int all_pinned = 0; - cpus_setall(*cpus); + cpumask_setall(cpus); /* * When power savings policy is enabled for the parent domain, idle @@ -3660,17 +3666,71 @@ redo: double_unlock_balance(this_rq, busiest); if (unlikely(all_pinned)) { - cpu_clear(cpu_of(busiest), *cpus); - if (!cpus_empty(*cpus)) + cpumask_clear_cpu(cpu_of(busiest), cpus); + if (!cpumask_empty(cpus)) goto redo; } } if (!ld_moved) { + int active_balance = 0; + schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; + + if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) + return -1; + + if (sd->nr_balance_failed++ < 2) + return -1; + + /* + * The only task running in a non-idle cpu can be moved to this + * cpu in an attempt to completely freeup the other CPU + * package. The same method used to move task in load_balance() + * have been extended for load_balance_newidle() to speedup + * consolidation at sched_mc=POWERSAVINGS_BALANCE_WAKEUP (2) + * + * The package power saving logic comes from + * find_busiest_group(). If there are no imbalance, then + * f_b_g() will return NULL. However when sched_mc={1,2} then + * f_b_g() will select a group from which a running task may be + * pulled to this cpu in order to make the other package idle. + * If there is no opportunity to make a package idle and if + * there are no imbalance, then f_b_g() will return NULL and no + * action will be taken in load_balance_newidle(). + * + * Under normal task pull operation due to imbalance, there + * will be more than one task in the source run queue and + * move_tasks() will succeed. ld_moved will be true and this + * active balance code will not be triggered. + */ + + /* Lock busiest in correct order while this_rq is held */ + double_lock_balance(this_rq, busiest); + + /* + * don't kick the migration_thread, if the curr + * task on busiest cpu can't be moved to this_cpu + */ + if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { + double_unlock_balance(this_rq, busiest); + all_pinned = 1; + return ld_moved; + } + + if (!busiest->active_balance) { + busiest->active_balance = 1; + busiest->push_cpu = this_cpu; + active_balance = 1; + } + + double_unlock_balance(this_rq, busiest); + if (active_balance) + wake_up_process(busiest->migration_thread); + } else sd->nr_balance_failed = 0; @@ -3696,7 +3756,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; unsigned long next_balance = jiffies + HZ; - cpumask_t tmpmask; + cpumask_var_t tmpmask; + + if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) + return; for_each_domain(this_cpu, sd) { unsigned long interval; @@ -3707,7 +3770,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) if (sd->flags & SD_BALANCE_NEWIDLE) /* If we've pulled tasks over stop searching: */ pulled_task = load_balance_newidle(this_cpu, this_rq, - sd, &tmpmask); + sd, tmpmask); interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) @@ -3722,6 +3785,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) */ this_rq->next_balance = next_balance; } + free_cpumask_var(tmpmask); } /* @@ -3759,7 +3823,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { if ((sd->flags & SD_LOAD_BALANCE) && - cpu_isset(busiest_cpu, sd->span)) + cpumask_test_cpu(busiest_cpu, sched_domain_span(sd))) break; } @@ -3778,10 +3842,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) #ifdef CONFIG_NO_HZ static struct { atomic_t load_balancer; - cpumask_t cpu_mask; + cpumask_var_t cpu_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), - .cpu_mask = CPU_MASK_NONE, }; /* @@ -3809,7 +3872,7 @@ int select_nohz_load_balancer(int stop_tick) int cpu = smp_processor_id(); if (stop_tick) { - cpu_set(cpu, nohz.cpu_mask); + cpumask_set_cpu(cpu, nohz.cpu_mask); cpu_rq(cpu)->in_nohz_recently = 1; /* @@ -3823,7 +3886,7 @@ int select_nohz_load_balancer(int stop_tick) } /* time for ilb owner also to sleep */ - if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { + if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { if (atomic_read(&nohz.load_balancer) == cpu) atomic_set(&nohz.load_balancer, -1); return 0; @@ -3836,10 +3899,10 @@ int select_nohz_load_balancer(int stop_tick) } else if (atomic_read(&nohz.load_balancer) == cpu) return 1; } else { - if (!cpu_isset(cpu, nohz.cpu_mask)) + if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) return 0; - cpu_clear(cpu, nohz.cpu_mask); + cpumask_clear_cpu(cpu, nohz.cpu_mask); if (atomic_read(&nohz.load_balancer) == cpu) if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) @@ -3867,7 +3930,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) unsigned long next_balance = jiffies + 60*HZ; int update_next_balance = 0; int need_serialize; - cpumask_t tmp; + cpumask_var_t tmp; + + /* Fails alloc? Rebalancing probably not a priority right now. */ + if (!alloc_cpumask_var(&tmp, GFP_ATOMIC)) + return; for_each_domain(cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -3892,7 +3959,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { + if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { /* * We've pulled tasks over so either we're no * longer idle, or one of our SMT siblings is @@ -3926,6 +3993,8 @@ out: */ if (likely(update_next_balance)) rq->next_balance = next_balance; + + free_cpumask_var(tmp); } /* @@ -3950,12 +4019,13 @@ static void run_rebalance_domains(struct softirq_action *h) */ if (this_rq->idle_at_tick && atomic_read(&nohz.load_balancer) == this_cpu) { - cpumask_t cpus = nohz.cpu_mask; struct rq *rq; int balance_cpu; - cpu_clear(this_cpu, cpus); - for_each_cpu_mask_nr(balance_cpu, cpus) { + for_each_cpu(balance_cpu, nohz.cpu_mask) { + if (balance_cpu == this_cpu) + continue; + /* * If this cpu gets work to do, stop the load balancing * work being done for other cpus. Next load @@ -3993,7 +4063,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) rq->in_nohz_recently = 0; if (atomic_read(&nohz.load_balancer) == cpu) { - cpu_clear(cpu, nohz.cpu_mask); + cpumask_clear_cpu(cpu, nohz.cpu_mask); atomic_set(&nohz.load_balancer, -1); } @@ -4006,7 +4076,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * TBD: Traverse the sched domains and nominate * the nearest cpu in the nohz.cpu_mask. */ - int ilb = first_cpu(nohz.cpu_mask); + int ilb = cpumask_first(nohz.cpu_mask); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -4018,7 +4088,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * cpus with ticks stopped, is it time for that to stop? */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && - cpus_weight(nohz.cpu_mask) == num_online_cpus()) { + cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { resched_cpu(cpu); return; } @@ -4028,7 +4098,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) * someone else, then no need raise the SCHED_SOFTIRQ */ if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && - cpu_isset(cpu, nohz.cpu_mask)) + cpumask_test_cpu(cpu, nohz.cpu_mask)) return; #endif if (time_after_eq(jiffies, rq->next_balance)) @@ -4080,13 +4150,17 @@ unsigned long long task_delta_exec(struct task_struct *p) * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ -void account_user_time(struct task_struct *p, cputime_t cputime) +void account_user_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; cputime64_t tmp; + /* Add user time to process. */ p->utime = cputime_add(p->utime, cputime); + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); account_group_user_time(p, cputime); /* Add user time to cpustat. */ @@ -4103,51 +4177,48 @@ void account_user_time(struct task_struct *p, cputime_t cputime) * Account guest cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ -static void account_guest_time(struct task_struct *p, cputime_t cputime) +static void account_guest_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) { cputime64_t tmp; struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; tmp = cputime_to_cputime64(cputime); + /* Add guest time to process. */ p->utime = cputime_add(p->utime, cputime); + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); account_group_user_time(p, cputime); p->gtime = cputime_add(p->gtime, cputime); + /* Add guest time to cpustat. */ cpustat->user = cputime64_add(cpustat->user, tmp); cpustat->guest = cputime64_add(cpustat->guest, tmp); } /* - * Account scaled user cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in user space since the last update - */ -void account_user_time_scaled(struct task_struct *p, cputime_t cputime) -{ - p->utimescaled = cputime_add(p->utimescaled, cputime); -} - -/* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the cpu time spent in kernel space since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ void account_system_time(struct task_struct *p, int hardirq_offset, - cputime_t cputime) + cputime_t cputime, cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - struct rq *rq = this_rq(); cputime64_t tmp; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { - account_guest_time(p, cputime); + account_guest_time(p, cputime, cputime_scaled); return; } + /* Add system time to process. */ p->stime = cputime_add(p->stime, cputime); + p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); account_group_system_time(p, cputime); /* Add system time to cpustat. */ @@ -4156,49 +4227,85 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count()) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); - else if (p != rq->idle) - cpustat->system = cputime64_add(cpustat->system, tmp); - else if (atomic_read(&rq->nr_iowait) > 0) - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else - cpustat->idle = cputime64_add(cpustat->idle, tmp); + cpustat->system = cputime64_add(cpustat->system, tmp); + /* Account for system time used */ acct_update_integrals(p); } /* - * Account scaled system cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @hardirq_offset: the offset to subtract from hardirq_count() - * @cputime: the cpu time spent in kernel space since the last update + * Account for involuntary wait time. + * @steal: the cpu time spent in involuntary wait */ -void account_system_time_scaled(struct task_struct *p, cputime_t cputime) +void account_steal_time(cputime_t cputime) { - p->stimescaled = cputime_add(p->stimescaled, cputime); + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + cputime64_t cputime64 = cputime_to_cputime64(cputime); + + cpustat->steal = cputime64_add(cpustat->steal, cputime64); } /* - * Account for involuntary wait time. - * @p: the process from which the cpu time has been stolen - * @steal: the cpu time spent in involuntary wait + * Account for idle time. + * @cputime: the cpu time spent in idle wait */ -void account_steal_time(struct task_struct *p, cputime_t steal) +void account_idle_time(cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - cputime64_t tmp = cputime_to_cputime64(steal); + cputime64_t cputime64 = cputime_to_cputime64(cputime); struct rq *rq = this_rq(); - if (p == rq->idle) { - p->stime = cputime_add(p->stime, steal); - if (atomic_read(&rq->nr_iowait) > 0) - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); - else - cpustat->idle = cputime64_add(cpustat->idle, tmp); - } else - cpustat->steal = cputime64_add(cpustat->steal, tmp); + if (atomic_read(&rq->nr_iowait) > 0) + cpustat->iowait = cputime64_add(cpustat->iowait, cputime64); + else + cpustat->idle = cputime64_add(cpustat->idle, cputime64); +} + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + +/* + * Account a single tick of cpu time. + * @p: the process that the cpu time gets accounted to + * @user_tick: indicates if the tick is a user or a system tick + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + cputime_t one_jiffy = jiffies_to_cputime(1); + cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); + struct rq *rq = this_rq(); + + if (user_tick) + account_user_time(p, one_jiffy, one_jiffy_scaled); + else if (p != rq->idle) + account_system_time(p, HARDIRQ_OFFSET, one_jiffy, + one_jiffy_scaled); + else + account_idle_time(one_jiffy); +} + +/* + * Account multiple ticks of steal time. + * @p: the process from which the cpu time has been stolen + * @ticks: number of stolen ticks + */ +void account_steal_ticks(unsigned long ticks) +{ + account_steal_time(jiffies_to_cputime(ticks)); } /* + * Account multiple ticks of idle time. + * @ticks: number of stolen ticks + */ +void account_idle_ticks(unsigned long ticks) +{ + account_idle_time(jiffies_to_cputime(ticks)); +} + +#endif + +/* * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING @@ -5401,10 +5508,9 @@ out_unlock: return retval; } -long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) +long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) { - cpumask_t cpus_allowed; - cpumask_t new_mask = *in_mask; + cpumask_var_t cpus_allowed, new_mask; struct task_struct *p; int retval; @@ -5426,6 +5532,14 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); + if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_put_task; + } + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_free_cpus_allowed; + } retval = -EPERM; if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; @@ -5434,37 +5548,41 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) if (retval) goto out_unlock; - cpuset_cpus_allowed(p, &cpus_allowed); - cpus_and(new_mask, new_mask, cpus_allowed); + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, in_mask, cpus_allowed); again: - retval = set_cpus_allowed_ptr(p, &new_mask); + retval = set_cpus_allowed_ptr(p, new_mask); if (!retval) { - cpuset_cpus_allowed(p, &cpus_allowed); - if (!cpus_subset(new_mask, cpus_allowed)) { + cpuset_cpus_allowed(p, cpus_allowed); + if (!cpumask_subset(new_mask, cpus_allowed)) { /* * We must have raced with a concurrent cpuset * update. Just reset the cpus_allowed to the * cpuset's cpus_allowed */ - new_mask = cpus_allowed; + cpumask_copy(new_mask, cpus_allowed); goto again; } } out_unlock: + free_cpumask_var(new_mask); +out_free_cpus_allowed: + free_cpumask_var(cpus_allowed); +out_put_task: put_task_struct(p); put_online_cpus(); return retval; } static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, - cpumask_t *new_mask) + struct cpumask *new_mask) { - if (len < sizeof(cpumask_t)) { - memset(new_mask, 0, sizeof(cpumask_t)); - } else if (len > sizeof(cpumask_t)) { - len = sizeof(cpumask_t); - } + if (len < cpumask_size()) + cpumask_clear(new_mask); + else if (len > cpumask_size()) + len = cpumask_size(); + return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; } @@ -5477,17 +5595,20 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { - cpumask_t new_mask; + cpumask_var_t new_mask; int retval; - retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); - if (retval) - return retval; + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) + return -ENOMEM; - return sched_setaffinity(pid, &new_mask); + retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); + if (retval == 0) + retval = sched_setaffinity(pid, new_mask); + free_cpumask_var(new_mask); + return retval; } -long sched_getaffinity(pid_t pid, cpumask_t *mask) +long sched_getaffinity(pid_t pid, struct cpumask *mask) { struct task_struct *p; int retval; @@ -5504,7 +5625,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) if (retval) goto out_unlock; - cpus_and(*mask, p->cpus_allowed, cpu_online_map); + cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); out_unlock: read_unlock(&tasklist_lock); @@ -5523,19 +5644,24 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long __user *user_mask_ptr) { int ret; - cpumask_t mask; + cpumask_var_t mask; - if (len < sizeof(cpumask_t)) + if (len < cpumask_size()) return -EINVAL; - ret = sched_getaffinity(pid, &mask); - if (ret < 0) - return ret; + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; - if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) - return -EFAULT; + ret = sched_getaffinity(pid, mask); + if (ret == 0) { + if (copy_to_user(user_mask_ptr, mask, cpumask_size())) + ret = -EFAULT; + else + ret = cpumask_size(); + } + free_cpumask_var(mask); - return sizeof(cpumask_t); + return ret; } /** @@ -5877,7 +6003,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->prio = idle->normal_prio = MAX_PRIO; - idle->cpus_allowed = cpumask_of_cpu(cpu); + cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); __set_task_cpu(idle, cpu); rq->curr = rq->idle = idle; @@ -5904,9 +6030,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * indicates which cpus entered this state. This is used * in the rcu update to wait only for active cpus. For system * which do not switch off the HZ timer nohz_cpu_mask should - * always be CPU_MASK_NONE. + * always be CPU_BITS_NONE. */ -cpumask_t nohz_cpu_mask = CPU_MASK_NONE; +cpumask_var_t nohz_cpu_mask; /* * Increase the granularity value when there are more CPUs, @@ -5961,7 +6087,7 @@ static inline void sched_init_granularity(void) * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { struct migration_req req; unsigned long flags; @@ -5969,13 +6095,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) int ret = 0; rq = task_rq_lock(p, &flags); - if (!cpus_intersects(*new_mask, cpu_online_map)) { + if (!cpumask_intersects(new_mask, cpu_online_mask)) { ret = -EINVAL; goto out; } if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && - !cpus_equal(p->cpus_allowed, *new_mask))) { + !cpumask_equal(&p->cpus_allowed, new_mask))) { ret = -EINVAL; goto out; } @@ -5983,15 +6109,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) if (p->sched_class->set_cpus_allowed) p->sched_class->set_cpus_allowed(p, new_mask); else { - p->cpus_allowed = *new_mask; - p->rt.nr_cpus_allowed = cpus_weight(*new_mask); + cpumask_copy(&p->cpus_allowed, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); } /* Can the task run on the task's current CPU? If so, we're done */ - if (cpu_isset(task_cpu(p), *new_mask)) + if (cpumask_test_cpu(task_cpu(p), new_mask)) goto out; - if (migrate_task(p, any_online_cpu(*new_mask), &req)) { + if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); @@ -6033,7 +6159,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) if (task_cpu(p) != src_cpu) goto done; /* Affinity changed (again). */ - if (!cpu_isset(dest_cpu, p->cpus_allowed)) + if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) goto fail; on_rq = p->se.on_rq; @@ -6130,50 +6256,43 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) */ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) { - unsigned long flags; - cpumask_t mask; - struct rq *rq; int dest_cpu; + /* FIXME: Use cpumask_of_node here. */ + cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu)); + const struct cpumask *nodemask = &_nodemask; + +again: + /* Look for allowed, online CPU in same node. */ + for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) + if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) + goto move; + + /* Any allowed, online CPU? */ + dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); + if (dest_cpu < nr_cpu_ids) + goto move; + + /* No more Mr. Nice Guy. */ + if (dest_cpu >= nr_cpu_ids) { + cpuset_cpus_allowed_locked(p, &p->cpus_allowed); + dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); - do { - /* On same node? */ - mask = node_to_cpumask(cpu_to_node(dead_cpu)); - cpus_and(mask, mask, p->cpus_allowed); - dest_cpu = any_online_cpu(mask); - - /* On any allowed CPU? */ - if (dest_cpu >= nr_cpu_ids) - dest_cpu = any_online_cpu(p->cpus_allowed); - - /* No more Mr. Nice Guy. */ - if (dest_cpu >= nr_cpu_ids) { - cpumask_t cpus_allowed; - - cpuset_cpus_allowed_locked(p, &cpus_allowed); - /* - * Try to stay on the same cpuset, where the - * current cpuset may be a subset of all cpus. - * The cpuset_cpus_allowed_locked() variant of - * cpuset_cpus_allowed() will not block. It must be - * called within calls to cpuset_lock/cpuset_unlock. - */ - rq = task_rq_lock(p, &flags); - p->cpus_allowed = cpus_allowed; - dest_cpu = any_online_cpu(p->cpus_allowed); - task_rq_unlock(rq, &flags); - - /* - * Don't tell them about moving exiting tasks or - * kernel threads (both mm NULL), since they never - * leave kernel. - */ - if (p->mm && printk_ratelimit()) { - printk(KERN_INFO "process %d (%s) no " - "longer affine to cpu%d\n", - task_pid_nr(p), p->comm, dead_cpu); - } + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk(KERN_INFO "process %d (%s) no " + "longer affine to cpu%d\n", + task_pid_nr(p), p->comm, dead_cpu); } - } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); + } + +move: + /* It can have affinity changed while we were choosing. */ + if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) + goto again; } /* @@ -6185,7 +6304,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) */ static void migrate_nr_uninterruptible(struct rq *rq_src) { - struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); + struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); unsigned long flags; local_irq_save(flags); @@ -6475,7 +6594,7 @@ static void set_rq_online(struct rq *rq) if (!rq->online) { const struct sched_class *class; - cpu_set(rq->cpu, rq->rd->online); + cpumask_set_cpu(rq->cpu, rq->rd->online); rq->online = 1; for_each_class(class) { @@ -6495,7 +6614,7 @@ static void set_rq_offline(struct rq *rq) class->rq_offline(rq); } - cpu_clear(rq->cpu, rq->rd->online); + cpumask_clear_cpu(rq->cpu, rq->rd->online); rq->online = 0; } } @@ -6536,7 +6655,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpu_isset(cpu, rq->rd->span)); + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_online(rq); } @@ -6550,7 +6669,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; /* Unbind it from offline cpu so it can run. Fall thru. */ kthread_bind(cpu_rq(cpu)->migration_thread, - any_online_cpu(cpu_online_map)); + cpumask_any(cpu_online_mask)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; @@ -6600,7 +6719,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq = cpu_rq(cpu); spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - BUG_ON(!cpu_isset(cpu, rq->rd->span)); + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); set_rq_offline(rq); } spin_unlock_irqrestore(&rq->lock, flags); @@ -6639,13 +6758,13 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, - cpumask_t *groupmask) + struct cpumask *groupmask) { struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sd->span); - cpus_clear(*groupmask); + cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); + cpumask_clear(groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -6659,11 +6778,11 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, printk(KERN_CONT "span %s level %s\n", str, sd->name); - if (!cpu_isset(cpu, sd->span)) { + if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { printk(KERN_ERR "ERROR: domain->span does not contain " "CPU%d\n", cpu); } - if (!cpu_isset(cpu, group->cpumask)) { + if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) { printk(KERN_ERR "ERROR: domain->groups does not contain" " CPU%d\n", cpu); } @@ -6683,31 +6802,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!cpus_weight(group->cpumask)) { + if (!cpumask_weight(sched_group_cpus(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); break; } - if (cpus_intersects(*groupmask, group->cpumask)) { + if (cpumask_intersects(groupmask, sched_group_cpus(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); break; } - cpus_or(*groupmask, *groupmask, group->cpumask); + cpumask_or(groupmask, groupmask, sched_group_cpus(group)); - cpulist_scnprintf(str, sizeof(str), group->cpumask); + cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); printk(KERN_CONT " %s", str); group = group->next; } while (group != sd->groups); printk(KERN_CONT "\n"); - if (!cpus_equal(sd->span, *groupmask)) + if (!cpumask_equal(sched_domain_span(sd), groupmask)) printk(KERN_ERR "ERROR: groups don't span domain->span\n"); - if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) + if (sd->parent && + !cpumask_subset(groupmask, sched_domain_span(sd->parent))) printk(KERN_ERR "ERROR: parent span is not a superset " "of domain->span\n"); return 0; @@ -6715,7 +6835,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, static void sched_domain_debug(struct sched_domain *sd, int cpu) { - cpumask_t *groupmask; + cpumask_var_t groupmask; int level = 0; if (!sd) { @@ -6725,8 +6845,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); - groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); - if (!groupmask) { + if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); return; } @@ -6739,7 +6858,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) if (!sd) break; } - kfree(groupmask); + free_cpumask_var(groupmask); } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) @@ -6747,7 +6866,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) static int sd_degenerate(struct sched_domain *sd) { - if (cpus_weight(sd->span) == 1) + if (cpumask_weight(sched_domain_span(sd)) == 1) return 1; /* Following flags need at least 2 groups */ @@ -6778,7 +6897,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) if (sd_degenerate(parent)) return 1; - if (!cpus_equal(sd->span, parent->span)) + if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) return 0; /* Does parent contain flags not in child? */ @@ -6802,6 +6921,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } +static void free_rootdomain(struct root_domain *rd) +{ + cpupri_cleanup(&rd->cpupri); + + free_cpumask_var(rd->rto_mask); + free_cpumask_var(rd->online); + free_cpumask_var(rd->span); + kfree(rd); +} + static void rq_attach_root(struct rq *rq, struct root_domain *rd) { unsigned long flags; @@ -6811,38 +6940,63 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) if (rq->rd) { struct root_domain *old_rd = rq->rd; - if (cpu_isset(rq->cpu, old_rd->online)) + if (cpumask_test_cpu(rq->cpu, old_rd->online)) set_rq_offline(rq); - cpu_clear(rq->cpu, old_rd->span); + cpumask_clear_cpu(rq->cpu, old_rd->span); if (atomic_dec_and_test(&old_rd->refcount)) - kfree(old_rd); + free_rootdomain(old_rd); } atomic_inc(&rd->refcount); rq->rd = rd; - cpu_set(rq->cpu, rd->span); - if (cpu_isset(rq->cpu, cpu_online_map)) + cpumask_set_cpu(rq->cpu, rd->span); + if (cpumask_test_cpu(rq->cpu, cpu_online_mask)) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); } -static void init_rootdomain(struct root_domain *rd) +static int init_rootdomain(struct root_domain *rd, bool bootmem) { memset(rd, 0, sizeof(*rd)); - cpus_clear(rd->span); - cpus_clear(rd->online); + if (bootmem) { + alloc_bootmem_cpumask_var(&def_root_domain.span); + alloc_bootmem_cpumask_var(&def_root_domain.online); + alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); + cpupri_init(&rd->cpupri, true); + return 0; + } + + if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) + goto free_rd; + if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) + goto free_span; + if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) + goto free_online; - cpupri_init(&rd->cpupri); + if (cpupri_init(&rd->cpupri, false) != 0) + goto free_rto_mask; + return 0; + +free_rto_mask: + free_cpumask_var(rd->rto_mask); +free_online: + free_cpumask_var(rd->online); +free_span: + free_cpumask_var(rd->span); +free_rd: + kfree(rd); + return -ENOMEM; } static void init_defrootdomain(void) { - init_rootdomain(&def_root_domain); + init_rootdomain(&def_root_domain, true); + atomic_set(&def_root_domain.refcount, 1); } @@ -6854,7 +7008,10 @@ static struct root_domain *alloc_rootdomain(void) if (!rd) return NULL; - init_rootdomain(rd); + if (init_rootdomain(rd, false) != 0) { + kfree(rd); + return NULL; + } return rd; } @@ -6896,19 +7053,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) } /* cpus with isolated domains */ -static cpumask_t cpu_isolated_map = CPU_MASK_NONE; +static cpumask_var_t cpu_isolated_map; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { - static int __initdata ints[NR_CPUS]; - int i; - - str = get_options(str, ARRAY_SIZE(ints), ints); - cpus_clear(cpu_isolated_map); - for (i = 1; i <= ints[0]; i++) - if (ints[i] < NR_CPUS) - cpu_set(ints[i], cpu_isolated_map); + cpulist_parse(str, cpu_isolated_map); return 1; } @@ -6917,42 +7067,43 @@ __setup("isolcpus=", isolated_cpu_setup); /* * init_sched_build_groups takes the cpumask we wish to span, and a pointer * to a function which identifies what group(along with sched group) a CPU - * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS - * (due to the fact that we keep track of groups covered with a cpumask_t). + * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids + * (due to the fact that we keep track of groups covered with a struct cpumask). * * init_sched_build_groups will build a circular linked list of the groups * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ static void -init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, - int (*group_fn)(int cpu, const cpumask_t *cpu_map, +init_sched_build_groups(const struct cpumask *span, + const struct cpumask *cpu_map, + int (*group_fn)(int cpu, const struct cpumask *cpu_map, struct sched_group **sg, - cpumask_t *tmpmask), - cpumask_t *covered, cpumask_t *tmpmask) + struct cpumask *tmpmask), + struct cpumask *covered, struct cpumask *tmpmask) { struct sched_group *first = NULL, *last = NULL; int i; - cpus_clear(*covered); + cpumask_clear(covered); - for_each_cpu_mask_nr(i, *span) { + for_each_cpu(i, span) { struct sched_group *sg; int group = group_fn(i, cpu_map, &sg, tmpmask); int j; - if (cpu_isset(i, *covered)) + if (cpumask_test_cpu(i, covered)) continue; - cpus_clear(sg->cpumask); + cpumask_clear(sched_group_cpus(sg)); sg->__cpu_power = 0; - for_each_cpu_mask_nr(j, *span) { + for_each_cpu(j, span) { if (group_fn(j, cpu_map, NULL, tmpmask) != group) continue; - cpu_set(j, *covered); - cpu_set(j, sg->cpumask); + cpumask_set_cpu(j, covered); + cpumask_set_cpu(j, sched_group_cpus(sg)); } if (!first) first = sg; @@ -7016,9 +7167,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) * should be one that prevents unnecessary balancing, but also spreads tasks * out optimally. */ -static void sched_domain_node_span(int node, cpumask_t *span) +static void sched_domain_node_span(int node, struct cpumask *span) { nodemask_t used_nodes; + /* FIXME: use cpumask_of_node() */ node_to_cpumask_ptr(nodemask, node); int i; @@ -7040,18 +7192,33 @@ static void sched_domain_node_span(int node, cpumask_t *span) int sched_smt_power_savings = 0, sched_mc_power_savings = 0; /* + * The cpus mask in sched_group and sched_domain hangs off the end. + * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space + * for nr_cpu_ids < CONFIG_NR_CPUS. + */ +struct static_sched_group { + struct sched_group sg; + DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); +}; + +struct static_sched_domain { + struct sched_domain sd; + DECLARE_BITMAP(span, CONFIG_NR_CPUS); +}; + +/* * SMT sched-domains: */ #ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); +static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); static int -cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - cpumask_t *unused) +cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *unused) { if (sg) - *sg = &per_cpu(sched_group_cpus, cpu); + *sg = &per_cpu(sched_group_cpus, cpu).sg; return cpu; } #endif /* CONFIG_SCHED_SMT */ @@ -7060,56 +7227,55 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, * multi-core sched-domains: */ #ifdef CONFIG_SCHED_MC -static DEFINE_PER_CPU(struct sched_domain, core_domains); -static DEFINE_PER_CPU(struct sched_group, sched_group_core); +static DEFINE_PER_CPU(struct static_sched_domain, core_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); #endif /* CONFIG_SCHED_MC */ #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) static int -cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - cpumask_t *mask) +cpu_to_core_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) { int group; - *mask = per_cpu(cpu_sibling_map, cpu); - cpus_and(*mask, *mask, *cpu_map); - group = first_cpu(*mask); + cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); + group = cpumask_first(mask); if (sg) - *sg = &per_cpu(sched_group_core, group); + *sg = &per_cpu(sched_group_core, group).sg; return group; } #elif defined(CONFIG_SCHED_MC) static int -cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - cpumask_t *unused) +cpu_to_core_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *unused) { if (sg) - *sg = &per_cpu(sched_group_core, cpu); + *sg = &per_cpu(sched_group_core, cpu).sg; return cpu; } #endif -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static DEFINE_PER_CPU(struct sched_group, sched_group_phys); +static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); static int -cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, - cpumask_t *mask) +cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, struct cpumask *mask) { int group; #ifdef CONFIG_SCHED_MC + /* FIXME: Use cpu_coregroup_mask. */ *mask = cpu_coregroup_map(cpu); cpus_and(*mask, *mask, *cpu_map); - group = first_cpu(*mask); + group = cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) - *mask = per_cpu(cpu_sibling_map, cpu); - cpus_and(*mask, *mask, *cpu_map); - group = first_cpu(*mask); + cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map); + group = cpumask_first(mask); #else group = cpu; #endif if (sg) - *sg = &per_cpu(sched_group_phys, group); + *sg = &per_cpu(sched_group_phys, group).sg; return group; } @@ -7123,19 +7289,21 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains); static struct sched_group ***sched_group_nodes_bycpu; static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); +static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); -static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, - struct sched_group **sg, cpumask_t *nodemask) +static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, + struct sched_group **sg, + struct cpumask *nodemask) { int group; + /* FIXME: use cpumask_of_node */ + node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu)); - *nodemask = node_to_cpumask(cpu_to_node(cpu)); - cpus_and(*nodemask, *nodemask, *cpu_map); - group = first_cpu(*nodemask); + cpumask_and(nodemask, pnodemask, cpu_map); + group = cpumask_first(nodemask); if (sg) - *sg = &per_cpu(sched_group_allnodes, group); + *sg = &per_cpu(sched_group_allnodes, group).sg; return group; } @@ -7147,11 +7315,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) if (!sg) return; do { - for_each_cpu_mask_nr(j, sg->cpumask) { + for_each_cpu(j, sched_group_cpus(sg)) { struct sched_domain *sd; - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { + sd = &per_cpu(phys_domains, j).sd; + if (j != cpumask_first(sched_group_cpus(sd->groups))) { /* * Only add "power" once for each * physical package. @@ -7168,11 +7336,12 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) #ifdef CONFIG_NUMA /* Free memory allocated for various sched_group structures */ -static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) +static void free_sched_groups(const struct cpumask *cpu_map, + struct cpumask *nodemask) { int cpu, i; - for_each_cpu_mask_nr(cpu, *cpu_map) { + for_each_cpu(cpu, cpu_map) { struct sched_group **sched_group_nodes = sched_group_nodes_bycpu[cpu]; @@ -7181,10 +7350,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) for (i = 0; i < nr_node_ids; i++) { struct sched_group *oldsg, *sg = sched_group_nodes[i]; + /* FIXME: Use cpumask_of_node */ + node_to_cpumask_ptr(pnodemask, i); - *nodemask = node_to_cpumask(i); - cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpus_empty(*nodemask)) + cpus_and(*nodemask, *pnodemask, *cpu_map); + if (cpumask_empty(nodemask)) continue; if (sg == NULL) @@ -7202,7 +7372,8 @@ next_sg: } } #else /* !CONFIG_NUMA */ -static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) +static void free_sched_groups(const struct cpumask *cpu_map, + struct cpumask *nodemask) { } #endif /* CONFIG_NUMA */ @@ -7228,7 +7399,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != first_cpu(sd->groups->cpumask)) + if (cpu != cpumask_first(sched_group_cpus(sd->groups))) return; child = sd->child; @@ -7293,48 +7464,6 @@ SD_INIT_FUNC(CPU) SD_INIT_FUNC(MC) #endif -/* - * To minimize stack usage kmalloc room for cpumasks and share the - * space as the usage in build_sched_domains() dictates. Used only - * if the amount of space is significant. - */ -struct allmasks { - cpumask_t tmpmask; /* make this one first */ - union { - cpumask_t nodemask; - cpumask_t this_sibling_map; - cpumask_t this_core_map; - }; - cpumask_t send_covered; - -#ifdef CONFIG_NUMA - cpumask_t domainspan; - cpumask_t covered; - cpumask_t notcovered; -#endif -}; - -#if NR_CPUS > 128 -#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v -static inline void sched_cpumask_alloc(struct allmasks **masks) -{ - *masks = kmalloc(sizeof(**masks), GFP_KERNEL); -} -static inline void sched_cpumask_free(struct allmasks *masks) -{ - kfree(masks); -} -#else -#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v -static inline void sched_cpumask_alloc(struct allmasks **masks) -{ } -static inline void sched_cpumask_free(struct allmasks *masks) -{ } -#endif - -#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ - ((unsigned long)(a) + offsetof(struct allmasks, v)) - static int default_relax_domain_level = -1; static int __init setup_relax_domain_level(char *str) @@ -7374,17 +7503,38 @@ static void set_domain_attribute(struct sched_domain *sd, * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static int __build_sched_domains(const cpumask_t *cpu_map, +static int __build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr) { - int i; + int i, err = -ENOMEM; struct root_domain *rd; - SCHED_CPUMASK_DECLARE(allmasks); - cpumask_t *tmpmask; + cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered, + tmpmask; #ifdef CONFIG_NUMA + cpumask_var_t domainspan, covered, notcovered; struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; + if (!alloc_cpumask_var(&domainspan, GFP_KERNEL)) + goto out; + if (!alloc_cpumask_var(&covered, GFP_KERNEL)) + goto free_domainspan; + if (!alloc_cpumask_var(¬covered, GFP_KERNEL)) + goto free_covered; +#endif + + if (!alloc_cpumask_var(&nodemask, GFP_KERNEL)) + goto free_notcovered; + if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL)) + goto free_nodemask; + if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL)) + goto free_this_sibling_map; + if (!alloc_cpumask_var(&send_covered, GFP_KERNEL)) + goto free_this_core_map; + if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + goto free_send_covered; + +#ifdef CONFIG_NUMA /* * Allocate the per-node list of sched groups */ @@ -7392,54 +7542,37 @@ static int __build_sched_domains(const cpumask_t *cpu_map, GFP_KERNEL); if (!sched_group_nodes) { printk(KERN_WARNING "Can not alloc sched group node list\n"); - return -ENOMEM; + goto free_tmpmask; } #endif rd = alloc_rootdomain(); if (!rd) { printk(KERN_WARNING "Cannot alloc root domain\n"); -#ifdef CONFIG_NUMA - kfree(sched_group_nodes); -#endif - return -ENOMEM; - } - - /* get space for all scratch cpumask variables */ - sched_cpumask_alloc(&allmasks); - if (!allmasks) { - printk(KERN_WARNING "Cannot alloc cpumask array\n"); - kfree(rd); -#ifdef CONFIG_NUMA - kfree(sched_group_nodes); -#endif - return -ENOMEM; + goto free_sched_groups; } - tmpmask = (cpumask_t *)allmasks; - - #ifdef CONFIG_NUMA - sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; + sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes; #endif /* * Set up domains for cpus specified by the cpu_map. */ - for_each_cpu_mask_nr(i, *cpu_map) { + for_each_cpu(i, cpu_map) { struct sched_domain *sd = NULL, *p; - SCHED_CPUMASK_VAR(nodemask, allmasks); + /* FIXME: use cpumask_of_node */ *nodemask = node_to_cpumask(cpu_to_node(i)); cpus_and(*nodemask, *nodemask, *cpu_map); #ifdef CONFIG_NUMA - if (cpus_weight(*cpu_map) > - SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { + if (cpumask_weight(cpu_map) > + SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) { sd = &per_cpu(allnodes_domains, i); SD_INIT(sd, ALLNODES); set_domain_attribute(sd, attr); - sd->span = *cpu_map; + cpumask_copy(sched_domain_span(sd), cpu_map); cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); p = sd; sd_allnodes = 1; @@ -7449,18 +7582,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map, sd = &per_cpu(node_domains, i); SD_INIT(sd, NODE); set_domain_attribute(sd, attr); - sched_domain_node_span(cpu_to_node(i), &sd->span); + sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); sd->parent = p; if (p) p->child = sd; - cpus_and(sd->span, sd->span, *cpu_map); + cpumask_and(sched_domain_span(sd), + sched_domain_span(sd), cpu_map); #endif p = sd; - sd = &per_cpu(phys_domains, i); + sd = &per_cpu(phys_domains, i).sd; SD_INIT(sd, CPU); set_domain_attribute(sd, attr); - sd->span = *nodemask; + cpumask_copy(sched_domain_span(sd), nodemask); sd->parent = p; if (p) p->child = sd; @@ -7468,11 +7602,12 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_MC p = sd; - sd = &per_cpu(core_domains, i); + sd = &per_cpu(core_domains, i).sd; SD_INIT(sd, MC); set_domain_attribute(sd, attr); - sd->span = cpu_coregroup_map(i); - cpus_and(sd->span, sd->span, *cpu_map); + *sched_domain_span(sd) = cpu_coregroup_map(i); + cpumask_and(sched_domain_span(sd), + sched_domain_span(sd), cpu_map); sd->parent = p; p->child = sd; cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); @@ -7480,11 +7615,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_SMT p = sd; - sd = &per_cpu(cpu_domains, i); + sd = &per_cpu(cpu_domains, i).sd; SD_INIT(sd, SIBLING); set_domain_attribute(sd, attr); - sd->span = per_cpu(cpu_sibling_map, i); - cpus_and(sd->span, sd->span, *cpu_map); + cpumask_and(sched_domain_span(sd), + &per_cpu(cpu_sibling_map, i), cpu_map); sd->parent = p; p->child = sd; cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); @@ -7493,13 +7628,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_cpu_mask_nr(i, *cpu_map) { - SCHED_CPUMASK_VAR(this_sibling_map, allmasks); - SCHED_CPUMASK_VAR(send_covered, allmasks); - - *this_sibling_map = per_cpu(cpu_sibling_map, i); - cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map); - if (i != first_cpu(*this_sibling_map)) + for_each_cpu(i, cpu_map) { + cpumask_and(this_sibling_map, + &per_cpu(cpu_sibling_map, i), cpu_map); + if (i != cpumask_first(this_sibling_map)) continue; init_sched_build_groups(this_sibling_map, cpu_map, @@ -7510,13 +7642,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_SCHED_MC /* Set up multi-core groups */ - for_each_cpu_mask_nr(i, *cpu_map) { - SCHED_CPUMASK_VAR(this_core_map, allmasks); - SCHED_CPUMASK_VAR(send_covered, allmasks); - + for_each_cpu(i, cpu_map) { + /* FIXME: Use cpu_coregroup_mask */ *this_core_map = cpu_coregroup_map(i); cpus_and(*this_core_map, *this_core_map, *cpu_map); - if (i != first_cpu(*this_core_map)) + if (i != cpumask_first(this_core_map)) continue; init_sched_build_groups(this_core_map, cpu_map, @@ -7527,12 +7657,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* Set up physical groups */ for (i = 0; i < nr_node_ids; i++) { - SCHED_CPUMASK_VAR(nodemask, allmasks); - SCHED_CPUMASK_VAR(send_covered, allmasks); - + /* FIXME: Use cpumask_of_node */ *nodemask = node_to_cpumask(i); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpus_empty(*nodemask)) + if (cpumask_empty(nodemask)) continue; init_sched_build_groups(nodemask, cpu_map, @@ -7543,8 +7671,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, #ifdef CONFIG_NUMA /* Set up node groups */ if (sd_allnodes) { - SCHED_CPUMASK_VAR(send_covered, allmasks); - init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, send_covered, tmpmask); @@ -7553,58 +7679,58 @@ static int __build_sched_domains(const cpumask_t *cpu_map, for (i = 0; i < nr_node_ids; i++) { /* Set up node groups */ struct sched_group *sg, *prev; - SCHED_CPUMASK_VAR(nodemask, allmasks); - SCHED_CPUMASK_VAR(domainspan, allmasks); - SCHED_CPUMASK_VAR(covered, allmasks); int j; + /* FIXME: Use cpumask_of_node */ *nodemask = node_to_cpumask(i); - cpus_clear(*covered); + cpumask_clear(covered); cpus_and(*nodemask, *nodemask, *cpu_map); - if (cpus_empty(*nodemask)) { + if (cpumask_empty(nodemask)) { sched_group_nodes[i] = NULL; continue; } sched_domain_node_span(i, domainspan); - cpus_and(*domainspan, *domainspan, *cpu_map); + cpumask_and(domainspan, domainspan, cpu_map); - sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); + sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), + GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING "Can not alloc domain group for " "node %d\n", i); goto error; } sched_group_nodes[i] = sg; - for_each_cpu_mask_nr(j, *nodemask) { + for_each_cpu(j, nodemask) { struct sched_domain *sd; sd = &per_cpu(node_domains, j); sd->groups = sg; } sg->__cpu_power = 0; - sg->cpumask = *nodemask; + cpumask_copy(sched_group_cpus(sg), nodemask); sg->next = sg; - cpus_or(*covered, *covered, *nodemask); + cpumask_or(covered, covered, nodemask); prev = sg; for (j = 0; j < nr_node_ids; j++) { - SCHED_CPUMASK_VAR(notcovered, allmasks); int n = (i + j) % nr_node_ids; + /* FIXME: Use cpumask_of_node */ node_to_cpumask_ptr(pnodemask, n); - cpus_complement(*notcovered, *covered); - cpus_and(*tmpmask, *notcovered, *cpu_map); - cpus_and(*tmpmask, *tmpmask, *domainspan); - if (cpus_empty(*tmpmask)) + cpumask_complement(notcovered, covered); + cpumask_and(tmpmask, notcovered, cpu_map); + cpumask_and(tmpmask, tmpmask, domainspan); + if (cpumask_empty(tmpmask)) break; - cpus_and(*tmpmask, *tmpmask, *pnodemask); - if (cpus_empty(*tmpmask)) + cpumask_and(tmpmask, tmpmask, pnodemask); + if (cpumask_empty(tmpmask)) continue; - sg = kmalloc_node(sizeof(struct sched_group), + sg = kmalloc_node(sizeof(struct sched_group) + + cpumask_size(), GFP_KERNEL, i); if (!sg) { printk(KERN_WARNING @@ -7612,9 +7738,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map, goto error; } sg->__cpu_power = 0; - sg->cpumask = *tmpmask; + cpumask_copy(sched_group_cpus(sg), tmpmask); sg->next = prev->next; - cpus_or(*covered, *covered, *tmpmask); + cpumask_or(covered, covered, tmpmask); prev->next = sg; prev = sg; } @@ -7623,22 +7749,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map, /* Calculate CPU power for physical packages and nodes */ #ifdef CONFIG_SCHED_SMT - for_each_cpu_mask_nr(i, *cpu_map) { - struct sched_domain *sd = &per_cpu(cpu_domains, i); + for_each_cpu(i, cpu_map) { + struct sched_domain *sd = &per_cpu(cpu_domains, i).sd; init_sched_groups_power(i, sd); } #endif #ifdef CONFIG_SCHED_MC - for_each_cpu_mask_nr(i, *cpu_map) { - struct sched_domain *sd = &per_cpu(core_domains, i); + for_each_cpu(i, cpu_map) { + struct sched_domain *sd = &per_cpu(core_domains, i).sd; init_sched_groups_power(i, sd); } #endif - for_each_cpu_mask_nr(i, *cpu_map) { - struct sched_domain *sd = &per_cpu(phys_domains, i); + for_each_cpu(i, cpu_map) { + struct sched_domain *sd = &per_cpu(phys_domains, i).sd; init_sched_groups_power(i, sd); } @@ -7650,53 +7776,78 @@ static int __build_sched_domains(const cpumask_t *cpu_map, if (sd_allnodes) { struct sched_group *sg; - cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, + cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, tmpmask); init_numa_sched_groups_power(sg); } #endif /* Attach the domains */ - for_each_cpu_mask_nr(i, *cpu_map) { + for_each_cpu(i, cpu_map) { struct sched_domain *sd; #ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); + sd = &per_cpu(cpu_domains, i).sd; #elif defined(CONFIG_SCHED_MC) - sd = &per_cpu(core_domains, i); + sd = &per_cpu(core_domains, i).sd; #else - sd = &per_cpu(phys_domains, i); + sd = &per_cpu(phys_domains, i).sd; #endif cpu_attach_domain(sd, rd, i); } - sched_cpumask_free(allmasks); - return 0; + err = 0; + +free_tmpmask: + free_cpumask_var(tmpmask); +free_send_covered: + free_cpumask_var(send_covered); +free_this_core_map: + free_cpumask_var(this_core_map); +free_this_sibling_map: + free_cpumask_var(this_sibling_map); +free_nodemask: + free_cpumask_var(nodemask); +free_notcovered: +#ifdef CONFIG_NUMA + free_cpumask_var(notcovered); +free_covered: + free_cpumask_var(covered); +free_domainspan: + free_cpumask_var(domainspan); +out: +#endif + return err; + +free_sched_groups: +#ifdef CONFIG_NUMA + kfree(sched_group_nodes); +#endif + goto free_tmpmask; #ifdef CONFIG_NUMA error: free_sched_groups(cpu_map, tmpmask); - sched_cpumask_free(allmasks); - kfree(rd); - return -ENOMEM; + free_rootdomain(rd); + goto free_tmpmask; #endif } -static int build_sched_domains(const cpumask_t *cpu_map) +static int build_sched_domains(const struct cpumask *cpu_map) { return __build_sched_domains(cpu_map, NULL); } -static cpumask_t *doms_cur; /* current sched domains */ +static struct cpumask *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ /* * Special case: If a kmalloc of a doms_cur partition (array of - * cpumask_t) fails, then fallback to a single sched domain, - * as determined by the single cpumask_t fallback_doms. + * cpumask) fails, then fallback to a single sched domain, + * as determined by the single cpumask fallback_doms. */ -static cpumask_t fallback_doms; +static cpumask_var_t fallback_doms; /* * arch_update_cpu_topology lets virtualized architectures update the @@ -7713,16 +7864,16 @@ int __attribute__((weak)) arch_update_cpu_topology(void) * For now this just excludes isolated cpus, but could be used to * exclude other special cases in the future. */ -static int arch_init_sched_domains(const cpumask_t *cpu_map) +static int arch_init_sched_domains(const struct cpumask *cpu_map) { int err; arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); + doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); if (!doms_cur) - doms_cur = &fallback_doms; - cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); + doms_cur = fallback_doms; + cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); dattr_cur = NULL; err = build_sched_domains(doms_cur); register_sched_domain_sysctl(); @@ -7730,8 +7881,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map) return err; } -static void arch_destroy_sched_domains(const cpumask_t *cpu_map, - cpumask_t *tmpmask) +static void arch_destroy_sched_domains(const struct cpumask *cpu_map, + struct cpumask *tmpmask) { free_sched_groups(cpu_map, tmpmask); } @@ -7740,15 +7891,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map, * Detach sched domains from a group of cpus specified in cpu_map * These cpus will now be attached to the NULL domain */ -static void detach_destroy_domains(const cpumask_t *cpu_map) +static void detach_destroy_domains(const struct cpumask *cpu_map) { - cpumask_t tmpmask; + /* Save because hotplug lock held. */ + static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); int i; - for_each_cpu_mask_nr(i, *cpu_map) + for_each_cpu(i, cpu_map) cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); - arch_destroy_sched_domains(cpu_map, &tmpmask); + arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); } /* handle null as "default" */ @@ -7773,7 +7925,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the @@ -7787,13 +7939,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * the single partition 'fallback_doms', it also forces the domains * to be rebuilt. * - * If doms_new == NULL it will be replaced with cpu_online_map. + * If doms_new == NULL it will be replaced with cpu_online_mask. * ndoms_new == 0 is a special case for destroying existing domains, * and it will not create the default domain. * * Call with hotplug lock held */ -void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, +/* FIXME: Change to struct cpumask *doms_new[] */ +void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, struct sched_domain_attr *dattr_new) { int i, j, n; @@ -7812,7 +7965,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpus_equal(doms_cur[i], doms_new[j]) + if (cpumask_equal(&doms_cur[i], &doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } @@ -7824,15 +7977,15 @@ match1: if (doms_new == NULL) { ndoms_cur = 0; - doms_new = &fallback_doms; - cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); + doms_new = fallback_doms; + cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpus_equal(doms_new[i], doms_cur[j]) + if (cpumask_equal(&doms_new[i], &doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } @@ -7844,7 +7997,7 @@ match2: } /* Remember the new sched domains */ - if (doms_cur != &fallback_doms) + if (doms_cur != fallback_doms) kfree(doms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; @@ -7873,14 +8026,25 @@ int arch_reinit_sched_domains(void) static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) { int ret; + unsigned int level = 0; + + if (sscanf(buf, "%u", &level) != 1) + return -EINVAL; + + /* + * level is always be positive so don't check for + * level < POWERSAVINGS_BALANCE_NONE which is 0 + * What happens on 0 or 1 byte write, + * need to check for count as well? + */ - if (buf[0] != '0' && buf[0] != '1') + if (level >= MAX_POWERSAVINGS_BALANCE_LEVELS) return -EINVAL; if (smt) - sched_smt_power_savings = (buf[0] == '1'); + sched_smt_power_savings = level; else - sched_mc_power_savings = (buf[0] == '1'); + sched_mc_power_savings = level; ret = arch_reinit_sched_domains(); @@ -7984,7 +8148,9 @@ static int update_runtime(struct notifier_block *nfb, void __init sched_init_smp(void) { - cpumask_t non_isolated_cpus; + cpumask_var_t non_isolated_cpus; + + alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); #if defined(CONFIG_NUMA) sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), @@ -7993,10 +8159,10 @@ void __init sched_init_smp(void) #endif get_online_cpus(); mutex_lock(&sched_domains_mutex); - arch_init_sched_domains(&cpu_online_map); - cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); - if (cpus_empty(non_isolated_cpus)) - cpu_set(smp_processor_id(), non_isolated_cpus); + arch_init_sched_domains(cpu_online_mask); + cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); + if (cpumask_empty(non_isolated_cpus)) + cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); put_online_cpus(); @@ -8011,9 +8177,13 @@ void __init sched_init_smp(void) init_hrtick(); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) BUG(); sched_init_granularity(); + free_cpumask_var(non_isolated_cpus); + + alloc_cpumask_var(&fallback_doms, GFP_KERNEL); + init_sched_rt_class(); } #else void __init sched_init_smp(void) @@ -8328,6 +8498,15 @@ void __init sched_init(void) */ current->sched_class = &fair_sched_class; + /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ + alloc_bootmem_cpumask_var(&nohz_cpu_mask); +#ifdef CONFIG_SMP +#ifdef CONFIG_NO_HZ + alloc_bootmem_cpumask_var(&nohz.cpu_mask); +#endif + alloc_bootmem_cpumask_var(&cpu_isolated_map); +#endif /* SMP */ + scheduler_running = 1; } diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 52154fefab7e..018b7be1db2e 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -67,24 +67,21 @@ static int convert_prio(int prio) * Returns: (int)bool - CPUs were found */ int cpupri_find(struct cpupri *cp, struct task_struct *p, - cpumask_t *lowest_mask) + struct cpumask *lowest_mask) { int idx = 0; int task_pri = convert_prio(p->prio); for_each_cpupri_active(cp->pri_active, idx) { struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; - cpumask_t mask; if (idx >= task_pri) break; - cpus_and(mask, p->cpus_allowed, vec->mask); - - if (cpus_empty(mask)) + if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) continue; - *lowest_mask = mask; + cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); return 1; } @@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) vec->count--; if (!vec->count) clear_bit(oldpri, cp->pri_active); - cpu_clear(cpu, vec->mask); + cpumask_clear_cpu(cpu, vec->mask); spin_unlock_irqrestore(&vec->lock, flags); } @@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) spin_lock_irqsave(&vec->lock, flags); - cpu_set(cpu, vec->mask); + cpumask_set_cpu(cpu, vec->mask); vec->count++; if (vec->count == 1) set_bit(newpri, cp->pri_active); @@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) /** * cpupri_init - initialize the cpupri structure * @cp: The cpupri context + * @bootmem: true if allocations need to use bootmem * - * Returns: (void) + * Returns: -ENOMEM if memory fails. */ -void cpupri_init(struct cpupri *cp) +int cpupri_init(struct cpupri *cp, bool bootmem) { int i; @@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp) spin_lock_init(&vec->lock); vec->count = 0; - cpus_clear(vec->mask); + if (bootmem) + alloc_bootmem_cpumask_var(&vec->mask); + else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL)) + goto cleanup; } for_each_possible_cpu(i) cp->cpu_to_pri[i] = CPUPRI_INVALID; + return 0; + +cleanup: + for (i--; i >= 0; i--) + free_cpumask_var(cp->pri_to_cpu[i].mask); + return -ENOMEM; } +/** + * cpupri_cleanup - clean up the cpupri structure + * @cp: The cpupri context + */ +void cpupri_cleanup(struct cpupri *cp) +{ + int i; + for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) + free_cpumask_var(cp->pri_to_cpu[i].mask); +} diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h index f25811b0f931..642a94ef8a0a 100644 --- a/kernel/sched_cpupri.h +++ b/kernel/sched_cpupri.h @@ -14,7 +14,7 @@ struct cpupri_vec { spinlock_t lock; int count; - cpumask_t mask; + cpumask_var_t mask; }; struct cpupri { @@ -27,7 +27,8 @@ struct cpupri { int cpupri_find(struct cpupri *cp, struct task_struct *p, cpumask_t *lowest_mask); void cpupri_set(struct cpupri *cp, int cpu, int pri); -void cpupri_init(struct cpupri *cp); +int cpupri_init(struct cpupri *cp, bool bootmem); +void cpupri_cleanup(struct cpupri *cp); #else #define cpupri_set(cp, cpu, pri) do { } while (0) #define cpupri_init() do { } while (0) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5ad4440f0fc4..56c0efe902a7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1019,16 +1019,33 @@ static void yield_task_fair(struct rq *rq) * search starts with cpus closest then further out as needed, * so we always favor a closer, idle cpu. * Domains may include CPUs that are not usable for migration, - * hence we need to mask them out (cpu_active_map) + * hence we need to mask them out (cpu_active_mask) * * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) static int wake_idle(int cpu, struct task_struct *p) { - cpumask_t tmp; struct sched_domain *sd; int i; + unsigned int chosen_wakeup_cpu; + int this_cpu; + + /* + * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu + * are idle and this is not a kernel thread and this task's affinity + * allows it to be moved to preferred cpu, then just move! + */ + + this_cpu = smp_processor_id(); + chosen_wakeup_cpu = + cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; + + if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && + idle_cpu(cpu) && idle_cpu(this_cpu) && + p->mm && !(p->flags & PF_KTHREAD) && + cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) + return chosen_wakeup_cpu; /* * If it is idle, then it is the best cpu to run this task. @@ -1046,10 +1063,9 @@ static int wake_idle(int cpu, struct task_struct *p) if ((sd->flags & SD_WAKE_IDLE) || ((sd->flags & SD_WAKE_IDLE_FAR) && !task_hot(p, task_rq(p)->clock, sd))) { - cpus_and(tmp, sd->span, p->cpus_allowed); - cpus_and(tmp, tmp, cpu_active_map); - for_each_cpu_mask_nr(i, tmp) { - if (idle_cpu(i)) { + for_each_cpu_and(i, sched_domain_span(sd), + &p->cpus_allowed) { + if (cpu_active(i) && idle_cpu(i)) { if (i != task_cpu(p)) { schedstat_inc(p, se.nr_wakeups_idle); @@ -1242,13 +1258,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync) * this_cpu and prev_cpu are present in: */ for_each_domain(this_cpu, sd) { - if (cpu_isset(prev_cpu, sd->span)) { + if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { this_sd = sd; break; } } - if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) + if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) goto out; /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 51d2af3e6191..833b6d44483c 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq) if (!rq->online) return; - cpu_set(rq->cpu, rq->rd->rto_mask); + cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); /* * Make sure the mask is visible before we set * the overload count. That is checked to determine @@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq) /* the order here really doesn't matter */ atomic_dec(&rq->rd->rto_count); - cpu_clear(rq->cpu, rq->rd->rto_mask); + cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); } static void update_rt_migration(struct rq *rq) @@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) } #ifdef CONFIG_SMP -static inline cpumask_t sched_rt_period_mask(void) +static inline const struct cpumask *sched_rt_period_mask(void) { return cpu_rq(smp_processor_id())->rd->span; } #else -static inline cpumask_t sched_rt_period_mask(void) +static inline const struct cpumask *sched_rt_period_mask(void) { - return cpu_online_map; + return cpu_online_mask; } #endif @@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) return rt_rq->rt_throttled; } -static inline cpumask_t sched_rt_period_mask(void) +static inline const struct cpumask *sched_rt_period_mask(void) { - return cpu_online_map; + return cpu_online_mask; } static inline @@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq) int i, weight, more = 0; u64 rt_period; - weight = cpus_weight(rd->span); + weight = cpumask_weight(rd->span); spin_lock(&rt_b->rt_runtime_lock); rt_period = ktime_to_ns(rt_b->rt_period); - for_each_cpu_mask_nr(i, rd->span) { + for_each_cpu(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq) /* * Greedy reclaim, take back as much as we can. */ - for_each_cpu_mask(i, rd->span) { + for_each_cpu(i, rd->span) { struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); s64 diff; @@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq) static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { int i, idle = 1; - cpumask_t span; + const struct cpumask *span; if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return 1; span = sched_rt_period_mask(); - for_each_cpu_mask(i, span) { + for_each_cpu(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); struct rq *rq = rq_of_rt_rq(rt_rq); @@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync) static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - cpumask_t mask; + cpumask_var_t mask; if (rq->curr->rt.nr_cpus_allowed == 1) return; - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, &mask)) + if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) return; - if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) - return; + if (p->rt.nr_cpus_allowed != 1 + && cpupri_find(&rq->rd->cpupri, p, mask)) + goto free; + + if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) + goto free; /* * There appears to be other cpus that can accept @@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ requeue_task_rt(rq, p, 1); resched_task(rq->curr); +free: + free_cpumask_var(mask); } #endif /* CONFIG_SMP */ @@ -914,7 +919,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && + (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) && (p->rt.nr_cpus_allowed > 1)) return 1; return 0; @@ -953,7 +958,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) return next; } -static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) { @@ -973,7 +978,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); + struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); @@ -988,7 +993,7 @@ static int find_lowest_rq(struct task_struct *task) * I guess we might want to change cpupri_find() to ignore those * in the first place. */ - cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); + cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); /* * At this point we have built a mask of cpus representing the @@ -998,7 +1003,7 @@ static int find_lowest_rq(struct task_struct *task) * We prioritize the last cpu that the task executed on since * it is most likely cache-hot in that location. */ - if (cpu_isset(cpu, *lowest_mask)) + if (cpumask_test_cpu(cpu, lowest_mask)) return cpu; /* @@ -1013,7 +1018,8 @@ static int find_lowest_rq(struct task_struct *task) cpumask_t domain_mask; int best_cpu; - cpus_and(domain_mask, sd->span, *lowest_mask); + cpumask_and(&domain_mask, sched_domain_span(sd), + lowest_mask); best_cpu = pick_optimal_cpu(this_cpu, &domain_mask); @@ -1054,8 +1060,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * Also make sure that it wasn't scheduled on its rq. */ if (unlikely(task_rq(task) != rq || - !cpu_isset(lowest_rq->cpu, - task->cpus_allowed) || + !cpumask_test_cpu(lowest_rq->cpu, + &task->cpus_allowed) || task_running(rq, task) || !task->se.on_rq)) { @@ -1176,7 +1182,7 @@ static int pull_rt_task(struct rq *this_rq) next = pick_next_task_rt(this_rq); - for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { + for_each_cpu(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; @@ -1305,9 +1311,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, } static void set_cpus_allowed_rt(struct task_struct *p, - const cpumask_t *new_mask) + const struct cpumask *new_mask) { - int weight = cpus_weight(*new_mask); + int weight = cpumask_weight(new_mask); BUG_ON(!rt_task(p)); @@ -1328,7 +1334,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, update_rt_migration(rq); } - p->cpus_allowed = *new_mask; + cpumask_copy(&p->cpus_allowed, new_mask); p->rt.nr_cpus_allowed = weight; } @@ -1371,6 +1377,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p, if (!rq->rt.rt_nr_running) pull_rt_task(rq); } + +static inline void init_sched_rt_class(void) +{ + unsigned int i; + + for_each_possible_cpu(i) + alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL); +} #endif /* CONFIG_SMP */ /* @@ -1541,3 +1555,4 @@ static void print_rt_stats(struct seq_file *m, int cpu) rcu_read_unlock(); } #endif /* CONFIG_SCHED_DEBUG */ + diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 3b01098164c8..f2773b5d1226 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, sd->span); + cpumask_scnprintf(mask_str, mask_len, + sched_domain_span(sd)); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { diff --git a/kernel/softirq.c b/kernel/softirq.c index 466e75ce271a..670c1eca47ec 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -784,3 +784,23 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait) } EXPORT_SYMBOL(on_each_cpu); #endif + +/* + * [ These __weak aliases are kept in a separate compilation unit, so that + * GCC does not inline them incorrectly. ] + */ + +int __init __weak early_irq_init(void) +{ + return 0; +} + +int __init __weak arch_early_irq_init(void) +{ + return 0; +} + +int __weak arch_init_chip_data(struct irq_desc *desc, int cpu) +{ + return 0; +} diff --git a/kernel/taskstats.c b/kernel/taskstats.c index bd6be76303cf..6d7dc4ec4aa5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask) if (!data) return -ENOMEM; nla_strlcpy(data, na, len); - ret = cpulist_parse(data, *mask); + ret = cpulist_parse(data, mask); kfree(data); return ret; } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index f8d968063cea..ea2f48af83cf 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -166,6 +166,8 @@ static void clockevents_notify_released(void) void clockevents_register_device(struct clock_event_device *dev) { BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(!dev->cpumask); + /* * A nsec2cyc multiplicator of 0 is invalid and we'd crash * on it, so fix it up and emit a warning: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f98a1b7b16e9..9590af2327be 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -150,7 +150,7 @@ static void tick_do_broadcast(cpumask_t mask) */ cpu = first_cpu(mask); td = &per_cpu(tick_cpu_device, cpu); - td->evtdev->broadcast(mask); + td->evtdev->broadcast(&mask); } } diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index df12434b43ca..f8372be74122 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) */ static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, - const cpumask_t *cpumask) + const struct cpumask *cpumask) { ktime_t next_event; void (*handler)(struct clock_event_device *) = NULL; @@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td, * When the device is not per cpu, pin the interrupt to the * current cpu: */ - if (!cpus_equal(newdev->cpumask, *cpumask)) - irq_set_affinity(newdev->irq, *cpumask); + if (!cpumask_equal(newdev->cpumask, cpumask)) + irq_set_affinity(newdev->irq, cpumask); /* * When global broadcasting is active, check if the current @@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev) spin_lock_irqsave(&tick_device_lock, flags); cpu = smp_processor_id(); - if (!cpu_isset(cpu, newdev->cpumask)) + if (!cpumask_test_cpu(cpu, newdev->cpumask)) goto out_bc; td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; /* cpu local device ? */ - if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) { + if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) { /* * If the cpu affinity of the device interrupt can not @@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) * If we have a cpu local device already, do not replace it * by a non cpu local device */ - if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu))) + if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) goto out_bc; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8f3fc2582d38..1b6c05bd0d0a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void) if (!ts->tick_stopped) return; - cpu_clear(cpu, nohz_cpu_mask); + cpumask_clear_cpu(cpu, nohz_cpu_mask); now = ktime_get(); ts->idle_waketime = now; @@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle) tick_do_timer_cpu = TICK_DO_TIMER_NONE; if (delta_jiffies > 1) - cpu_set(cpu, nohz_cpu_mask); + cpumask_set_cpu(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) @@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle) /* * sched tick not stopped! */ - cpu_clear(cpu, nohz_cpu_mask); + cpumask_clear_cpu(cpu, nohz_cpu_mask); goto out; } @@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle) * softirq. */ tick_do_update_jiffies64(ktime_get()); - cpu_clear(cpu, nohz_cpu_mask); + cpumask_clear_cpu(cpu, nohz_cpu_mask); } raise_softirq_irqoff(TIMER_SOFTIRQ); out: @@ -419,7 +419,9 @@ void tick_nohz_restart_sched_tick(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +#ifndef CONFIG_VIRT_CPU_ACCOUNTING unsigned long ticks; +#endif ktime_t now; local_irq_disable(); @@ -439,8 +441,9 @@ void tick_nohz_restart_sched_tick(void) select_nohz_load_balancer(0); now = ktime_get(); tick_do_update_jiffies64(now); - cpu_clear(cpu, nohz_cpu_mask); + cpumask_clear_cpu(cpu, nohz_cpu_mask); +#ifndef CONFIG_VIRT_CPU_ACCOUNTING /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -450,12 +453,9 @@ void tick_nohz_restart_sched_tick(void) /* * We might be one off. Do not randomly account a huge number of ticks! */ - if (ticks && ticks < LONG_MAX) { - add_preempt_count(HARDIRQ_OFFSET); - account_system_time(current, HARDIRQ_OFFSET, - jiffies_to_cputime(ticks)); - sub_preempt_count(HARDIRQ_OFFSET); - } + if (ticks && ticks < LONG_MAX) + account_idle_ticks(ticks); +#endif touch_softlockup_watchdog(); /* diff --git a/kernel/timer.c b/kernel/timer.c index 566257d1dc10..dee3f641a7a7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1018,21 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now) } #endif -#ifndef CONFIG_VIRT_CPU_ACCOUNTING -void account_process_tick(struct task_struct *p, int user_tick) -{ - cputime_t one_jiffy = jiffies_to_cputime(1); - - if (user_tick) { - account_user_time(p, one_jiffy); - account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); - } else { - account_system_time(p, HARDIRQ_OFFSET, one_jiffy); - account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); - } -} -#endif - /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4185d5221633..0e91f43b6baf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2674,7 +2674,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2695,7 +2695,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); if (err) goto err_unlock; diff --git a/lib/Kconfig b/lib/Kconfig index fd4118e097f0..2ba43c4a5b07 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -159,4 +159,11 @@ config CHECK_SIGNATURE config HAVE_LMB boolean +config CPUMASK_OFFSTACK + bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS + help + Use dynamic allocation for cpumask_var_t, instead of putting + them on the stack. This is a bit more expensive, but avoids + stack overflow. + endmenu diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 3b777025d876..98d632277ca8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -661,6 +661,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width, */ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags) { + if (!ptr) + return string(buf, end, "(null)", field_width, precision, flags); + switch (*fmt) { case 'F': ptr = dereference_function_descriptor(ptr); diff --git a/mm/slub.c b/mm/slub.c index 6cb7ad107852..0d861c3154b6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3642,7 +3642,7 @@ static int list_locations(struct kmem_cache *s, char *buf, len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->cpus); + &l->cpus); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index e4e2caeb9d82..086d5ef098fd 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -371,9 +371,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) IRDA_DEBUG(2, "%s()\n", __func__ ); line = tty->index; - if ((line < 0) || (line >= IRCOMM_TTY_PORTS)) { + if (line >= IRCOMM_TTY_PORTS) return -ENODEV; - } /* Check if instance already exists */ self = hashbin_lock_find(ircomm_tty, line, NULL); @@ -405,6 +404,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) * Force TTY into raw mode by default which is usually what * we want for IrCOMM and IrLPT. This way applications will * not have to twiddle with printcap etc. + * + * Note this is completely usafe and doesn't work properly */ tty->termios->c_iflag = 0; tty->termios->c_oflag = 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c6250d0055d2..d1b89820ab4f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -836,7 +836,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; + err = security_path_mknod(&nd.path, dentry, mode, 0); + if (err) + goto out_mknod_drop_write; err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); +out_mknod_drop_write: mnt_drop_write(nd.path.mnt); if (err) goto out_mknod_dput; diff --git a/scripts/headers_check.pl b/scripts/headers_check.pl index 488a3b1f760f..db30fac3083e 100644 --- a/scripts/headers_check.pl +++ b/scripts/headers_check.pl @@ -14,7 +14,9 @@ # Only include files located in asm* and linux* are checked. # The rest are assumed to be system include files. # -# 2) TODO: check for leaked CONFIG_ symbols +# 2) It is checked that prototypes does not use "extern" +# +# 3) Check for leaked CONFIG_ symbols use strict; @@ -32,7 +34,11 @@ foreach my $file (@files) { $lineno = 0; while ($line = <FH>) { $lineno++; - check_include(); + &check_include(); + &check_asm_types(); + &check_sizetypes(); + &check_prototypes(); + &check_config(); } close FH; } @@ -54,3 +60,63 @@ sub check_include } } } + +sub check_prototypes +{ + if ($line =~ m/^\s*extern\b/) { + printf STDERR "$filename:$lineno: extern's make no sense in userspace\n"; + } +} + +sub check_config +{ + if ($line =~ m/[^a-zA-Z0-9_]+CONFIG_([a-zA-Z0-9]+)[^a-zA-Z0-9]/) { + printf STDERR "$filename:$lineno: leaks CONFIG_$1 to userspace where it is not valid\n"; + } +} + +my $linux_asm_types; +sub check_asm_types() +{ + if ($filename =~ /types.h|int-l64.h|int-ll64.h/o) { + return; + } + if ($lineno == 1) { + $linux_asm_types = 0; + } elsif ($linux_asm_types >= 1) { + return; + } + if ($line =~ m/^\s*#\s*include\s+<asm\/types.h>/) { + $linux_asm_types = 1; + printf STDERR "$filename:$lineno: " . + "include of <linux/types.h> is preferred over <asm/types.h>\n" + # Warn until headers are all fixed + #$ret = 1; + } +} + +my $linux_types; +sub check_sizetypes +{ + if ($filename =~ /types.h|int-l64.h|int-ll64.h/o) { + return; + } + if ($lineno == 1) { + $linux_types = 0; + } elsif ($linux_types >= 1) { + return; + } + if ($line =~ m/^\s*#\s*include\s+<linux\/types.h>/) { + $linux_types = 1; + return; + } + if ($line =~ m/__[us](8|16|32|64)\b/) { + printf STDERR "$filename:$lineno: " . + "found __[us]{8,16,32,64} type " . + "without #include <linux/types.h>\n"; + $linux_types = 2; + # Warn until headers are all fixed + #$ret = 1; + } +} + diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl index 7d2b4146e02f..c6ae4052ab43 100644 --- a/scripts/headers_install.pl +++ b/scripts/headers_install.pl @@ -36,6 +36,9 @@ foreach my $file (@files) { $line =~ s/\s__attribute_const__\s/ /g; $line =~ s/\s__attribute_const__$//g; $line =~ s/^#include <linux\/compiler.h>//; + $line =~ s/(^|\s)(inline)\b/$1__$2__/g; + $line =~ s/(^|\s)(asm)\b(\s|[(]|$)/$1__$2__$3/g; + $line =~ s/(^|\s|[(])(volatile)\b(\s|[(]|$)/$1__$2__$3/g; printf OUTFILE "%s", $line; } close OUTFILE; diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index 9d4cba1c001d..6408fefae083 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -65,9 +65,13 @@ enum symbol_type { S_UNKNOWN, S_BOOLEAN, S_TRISTATE, S_INT, S_HEX, S_STRING, S_OTHER }; +/* enum values are used as index to symbol.def[] */ enum { S_DEF_USER, /* main user value */ - S_DEF_AUTO, + S_DEF_AUTO, /* values read from auto.conf */ + S_DEF_DEF3, /* Reserved for UI usage */ + S_DEF_DEF4, /* Reserved for UI usage */ + S_DEF_COUNT }; struct symbol { @@ -75,7 +79,7 @@ struct symbol { char *name; enum symbol_type type; struct symbol_value curr; - struct symbol_value def[4]; + struct symbol_value def[S_DEF_COUNT]; tristate visible; int flags; struct property *prop; @@ -84,42 +88,64 @@ struct symbol { #define for_all_symbols(i, sym) for (i = 0; i < 257; i++) for (sym = symbol_hash[i]; sym; sym = sym->next) if (sym->type != S_OTHER) -#define SYMBOL_CONST 0x0001 -#define SYMBOL_CHECK 0x0008 -#define SYMBOL_CHOICE 0x0010 -#define SYMBOL_CHOICEVAL 0x0020 -#define SYMBOL_VALID 0x0080 -#define SYMBOL_OPTIONAL 0x0100 -#define SYMBOL_WRITE 0x0200 -#define SYMBOL_CHANGED 0x0400 -#define SYMBOL_AUTO 0x1000 -#define SYMBOL_CHECKED 0x2000 -#define SYMBOL_WARNED 0x8000 -#define SYMBOL_DEF 0x10000 -#define SYMBOL_DEF_USER 0x10000 -#define SYMBOL_DEF_AUTO 0x20000 -#define SYMBOL_DEF3 0x40000 -#define SYMBOL_DEF4 0x80000 +#define SYMBOL_CONST 0x0001 /* symbol is const */ +#define SYMBOL_CHECK 0x0008 /* used during dependency checking */ +#define SYMBOL_CHOICE 0x0010 /* start of a choice block (null name) */ +#define SYMBOL_CHOICEVAL 0x0020 /* used as a value in a choice block */ +#define SYMBOL_VALID 0x0080 /* set when symbol.curr is calculated */ +#define SYMBOL_OPTIONAL 0x0100 /* choice is optional - values can be 'n' */ +#define SYMBOL_WRITE 0x0200 /* ? */ +#define SYMBOL_CHANGED 0x0400 /* ? */ +#define SYMBOL_AUTO 0x1000 /* value from environment variable */ +#define SYMBOL_CHECKED 0x2000 /* used during dependency checking */ +#define SYMBOL_WARNED 0x8000 /* warning has been issued */ + +/* Set when symbol.def[] is used */ +#define SYMBOL_DEF 0x10000 /* First bit of SYMBOL_DEF */ +#define SYMBOL_DEF_USER 0x10000 /* symbol.def[S_DEF_USER] is valid */ +#define SYMBOL_DEF_AUTO 0x20000 /* symbol.def[S_DEF_AUTO] is valid */ +#define SYMBOL_DEF3 0x40000 /* symbol.def[S_DEF_3] is valid */ +#define SYMBOL_DEF4 0x80000 /* symbol.def[S_DEF_4] is valid */ #define SYMBOL_MAXLENGTH 256 #define SYMBOL_HASHSIZE 257 #define SYMBOL_HASHMASK 0xff +/* A property represent the config options that can be associated + * with a config "symbol". + * Sample: + * config FOO + * default y + * prompt "foo prompt" + * select BAR + * config BAZ + * int "BAZ Value" + * range 1..255 + */ enum prop_type { - P_UNKNOWN, P_PROMPT, P_COMMENT, P_MENU, P_DEFAULT, P_CHOICE, - P_SELECT, P_RANGE, P_ENV + P_UNKNOWN, + P_PROMPT, /* prompt "foo prompt" or "BAZ Value" */ + P_COMMENT, /* text associated with a comment */ + P_MENU, /* prompt associated with a menuconfig option */ + P_DEFAULT, /* default y */ + P_CHOICE, /* choice value */ + P_SELECT, /* select BAR */ + P_RANGE, /* range 7..100 (for a symbol) */ + P_ENV, /* value from environment variable */ }; struct property { - struct property *next; - struct symbol *sym; - enum prop_type type; - const char *text; + struct property *next; /* next property - null if last */ + struct symbol *sym; /* the symbol for which the property is associated */ + enum prop_type type; /* type of property */ + const char *text; /* the prompt value - P_PROMPT, P_MENU, P_COMMENT */ struct expr_value visible; - struct expr *expr; - struct menu *menu; - struct file *file; - int lineno; + struct expr *expr; /* the optional conditional part of the property */ + struct menu *menu; /* the menu the property are associated with + * valid for: P_SELECT, P_RANGE, P_CHOICE, + * P_PROMPT, P_DEFAULT, P_MENU, P_COMMENT */ + struct file *file; /* what file was this property defined */ + int lineno; /* what lineno was this property defined */ }; #define for_all_properties(sym, st, tok) \ diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped index 7342ce0a7780..dc3e81807d13 100644 --- a/scripts/kconfig/lex.zconf.c_shipped +++ b/scripts/kconfig/lex.zconf.c_shipped @@ -2370,11 +2370,14 @@ void zconf_nextfile(const char *name) current_buf = buf; if (file->flags & FILE_BUSY) { - printf("recursive scan (%s)?\n", name); + printf("%s:%d: do not source '%s' from itself\n", + zconf_curname(), zconf_lineno(), name); exit(1); } if (file->flags & FILE_SCANNED) { - printf("file %s already scanned?\n", name); + printf("%s:%d: file '%s' is already sourced from '%s'\n", + zconf_curname(), zconf_lineno(), name, + file->parent->name); exit(1); } file->flags |= FILE_BUSY; diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 5164ef7ce499..21ff69c9ad4e 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -314,11 +314,14 @@ void zconf_nextfile(const char *name) current_buf = buf; if (file->flags & FILE_BUSY) { - printf("recursive scan (%s)?\n", name); + printf("%s:%d: do not source '%s' from itself\n", + zconf_curname(), zconf_lineno(), name); exit(1); } if (file->flags & FILE_SCANNED) { - printf("file %s already scanned?\n", name); + printf("%s:%d: file '%s' is already sourced from '%s'\n", + zconf_curname(), zconf_lineno(), name, + file->parent->name); exit(1); } file->flags |= FILE_BUSY; diff --git a/scripts/tags.sh b/scripts/tags.sh index 4e7547209852..9e3451d2c3a1 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -84,7 +84,6 @@ docscope() exuberant() { - all_sources > all all_sources | xargs $1 -a \ -I __initdata,__exitdata,__acquires,__releases \ -I __read_mostly,____cacheline_aligned \ diff --git a/security/Kconfig b/security/Kconfig index d9f47ce7e207..9438535d7fd0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -81,6 +81,15 @@ config SECURITY_NETWORK_XFRM IPSec. If you are unsure how to answer this question, answer N. +config SECURITY_PATH + bool "Security hooks for pathname based access control" + depends on SECURITY + help + This enables the security hooks for pathname based access control. + If enabled, a security module can use these hooks to + implement pathname based access controls. + If you are unsure how to answer this question, answer N. + config SECURITY_FILE_CAPABILITIES bool "File POSIX Capabilities" default n diff --git a/security/capability.c b/security/capability.c index 2dce66fcb992..c545bd1300b5 100644 --- a/security/capability.c +++ b/security/capability.c @@ -263,6 +263,53 @@ static void cap_inode_getsecid(const struct inode *inode, u32 *secid) *secid = 0; } +#ifdef CONFIG_SECURITY_PATH +static int cap_path_mknod(struct path *dir, struct dentry *dentry, int mode, + unsigned int dev) +{ + return 0; +} + +static int cap_path_mkdir(struct path *dir, struct dentry *dentry, int mode) +{ + return 0; +} + +static int cap_path_rmdir(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static int cap_path_unlink(struct path *dir, struct dentry *dentry) +{ + return 0; +} + +static int cap_path_symlink(struct path *dir, struct dentry *dentry, + const char *old_name) +{ + return 0; +} + +static int cap_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry) +{ + return 0; +} + +static int cap_path_rename(struct path *old_path, struct dentry *old_dentry, + struct path *new_path, struct dentry *new_dentry) +{ + return 0; +} + +static int cap_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs) +{ + return 0; +} +#endif + static int cap_file_permission(struct file *file, int mask) { return 0; @@ -883,6 +930,16 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, inode_setsecurity); set_to_cap_if_null(ops, inode_listsecurity); set_to_cap_if_null(ops, inode_getsecid); +#ifdef CONFIG_SECURITY_PATH + set_to_cap_if_null(ops, path_mknod); + set_to_cap_if_null(ops, path_mkdir); + set_to_cap_if_null(ops, path_rmdir); + set_to_cap_if_null(ops, path_unlink); + set_to_cap_if_null(ops, path_symlink); + set_to_cap_if_null(ops, path_link); + set_to_cap_if_null(ops, path_rename); + set_to_cap_if_null(ops, path_truncate); +#endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); diff --git a/security/security.c b/security/security.c index d85dbb37c972..678d4d07b852 100644 --- a/security/security.c +++ b/security/security.c @@ -355,6 +355,72 @@ int security_inode_init_security(struct inode *inode, struct inode *dir, } EXPORT_SYMBOL(security_inode_init_security); +#ifdef CONFIG_SECURITY_PATH +int security_path_mknod(struct path *path, struct dentry *dentry, int mode, + unsigned int dev) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_mknod(path, dentry, mode, dev); +} +EXPORT_SYMBOL(security_path_mknod); + +int security_path_mkdir(struct path *path, struct dentry *dentry, int mode) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_mkdir(path, dentry, mode); +} + +int security_path_rmdir(struct path *path, struct dentry *dentry) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_rmdir(path, dentry); +} + +int security_path_unlink(struct path *path, struct dentry *dentry) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_unlink(path, dentry); +} + +int security_path_symlink(struct path *path, struct dentry *dentry, + const char *old_name) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_symlink(path, dentry, old_name); +} + +int security_path_link(struct dentry *old_dentry, struct path *new_dir, + struct dentry *new_dentry) +{ + if (unlikely(IS_PRIVATE(old_dentry->d_inode))) + return 0; + return security_ops->path_link(old_dentry, new_dir, new_dentry); +} + +int security_path_rename(struct path *old_dir, struct dentry *old_dentry, + struct path *new_dir, struct dentry *new_dentry) +{ + if (unlikely(IS_PRIVATE(old_dentry->d_inode) || + (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) + return 0; + return security_ops->path_rename(old_dir, old_dentry, new_dir, + new_dentry); +} + +int security_path_truncate(struct path *path, loff_t length, + unsigned int time_attrs) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_truncate(path, length, time_attrs); +} +#endif + int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) { if (unlikely(IS_PRIVATE(dir))) diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 53772bb46320..23b81cf242af 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -150,10 +150,11 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic, static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) { kvm_inject_nmi(vcpu); + kvm_vcpu_kick(vcpu); } -static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, - u8 dest_mode) +u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, + u8 dest_mode) { u32 mask = 0; int i; @@ -207,7 +208,8 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) "vector=%x trig_mode=%x\n", dest, dest_mode, delivery_mode, vector, trig_mode); - deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, + dest_mode); if (!deliver_bitmask) { ioapic_debug("no target on destination\n"); return 0; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index cd7ae7691c9d..49c9581d2586 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -85,5 +85,7 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); +u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, + u8 dest_mode); #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 55ad76ee2d09..aa5d1e5c497e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -61,10 +61,9 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); } -void kvm_unregister_irq_ack_notifier(struct kvm *kvm, - struct kvm_irq_ack_notifier *kian) +void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian) { - hlist_del(&kian->link); + hlist_del_init(&kian->link); } /* The caller must hold kvm->lock mutex */ @@ -73,11 +72,15 @@ int kvm_request_irq_source_id(struct kvm *kvm) unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; int irq_source_id = find_first_zero_bit(bitmap, sizeof(kvm->arch.irq_sources_bitmap)); + if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); - irq_source_id = -EFAULT; - } else - set_bit(irq_source_id, bitmap); + return -EFAULT; + } + + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); + set_bit(irq_source_id, bitmap); + return irq_source_id; } @@ -85,7 +88,9 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) { int i; - if (irq_source_id <= 0 || + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); + + if (irq_source_id < 0 || irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); return; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a87f45edfae8..fc6127cbea1f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,6 +47,10 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> +#ifdef CONFIG_X86 +#include <asm/msidef.h> +#endif + #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET #include "coalesced_mmio.h" #endif @@ -60,10 +64,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +static int msi2intx = 1; +module_param(msi2intx, bool, 0); + DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); -static cpumask_t cpus_hardware_enabled; +static cpumask_var_t cpus_hardware_enabled; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -75,9 +82,60 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); -bool kvm_rebooting; +static bool kvm_rebooting; #ifdef KVM_CAP_DEVICE_ASSIGNMENT + +#ifdef CONFIG_X86 +static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) +{ + int vcpu_id; + struct kvm_vcpu *vcpu; + struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm); + int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK) + >> MSI_ADDR_DEST_ID_SHIFT; + int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK) + >> MSI_DATA_VECTOR_SHIFT; + int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, + (unsigned long *)&dev->guest_msi.address_lo); + int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, + (unsigned long *)&dev->guest_msi.data); + int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, + (unsigned long *)&dev->guest_msi.data); + u32 deliver_bitmask; + + BUG_ON(!ioapic); + + deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, + dest_id, dest_mode); + /* IOAPIC delivery mode value is the same as MSI here */ + switch (delivery_mode) { + case IOAPIC_LOWEST_PRIORITY: + vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, + deliver_bitmask); + if (vcpu != NULL) + kvm_apic_set_irq(vcpu, vector, trig_mode); + else + printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); + break; + case IOAPIC_FIXED: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) + kvm_apic_set_irq(vcpu, vector, trig_mode); + } + break; + default: + printk(KERN_INFO "kvm: unsupported MSI delivery mode\n"); + } +} +#else +static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {} +#endif + static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) { @@ -104,9 +162,16 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) * finer-grained lock, update this */ mutex_lock(&assigned_dev->kvm->lock); - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX) + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + else if (assigned_dev->irq_requested_type & + KVM_ASSIGNED_DEV_GUEST_MSI) { + assigned_device_msi_dispatch(assigned_dev); + enable_irq(assigned_dev->host_irq); + assigned_dev->host_irq_disabled = false; + } mutex_unlock(&assigned_dev->kvm->lock); kvm_put_kvm(assigned_dev->kvm); } @@ -117,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) (struct kvm_assigned_dev_kernel *) dev_id; kvm_get_kvm(assigned_dev->kvm); + schedule_work(&assigned_dev->interrupt_work); + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + return IRQ_HANDLED; } @@ -132,19 +201,32 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev = container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - enable_irq(dev->host_irq); + + /* The guest irq may be shared so this ack may be + * from another device. + */ + if (dev->host_irq_disabled) { + enable_irq(dev->host_irq); + dev->host_irq_disabled = false; + } } -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) { - if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) - free_irq(assigned_dev->host_irq, (void *)assigned_dev); + if (!irqchip_in_kernel(kvm)) + return; + + kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + if (assigned_dev->irq_source_id != -1) + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + assigned_dev->irq_source_id = -1; + + if (!assigned_dev->irq_requested_type) + return; if (cancel_work_sync(&assigned_dev->interrupt_work)) /* We had pending work. That means we will have to take @@ -152,6 +234,23 @@ static void kvm_free_assigned_device(struct kvm *kvm, */ kvm_put_kvm(kvm); + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + + assigned_dev->irq_requested_type = 0; +} + + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + kvm_free_assigned_irq(kvm, assigned_dev); + + pci_reset_function(assigned_dev->dev); + pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); pci_dev_put(assigned_dev->dev); @@ -174,6 +273,95 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) } } +static int assigned_device_update_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *adev, + struct kvm_assigned_irq *airq) +{ + adev->guest_irq = airq->guest_irq; + adev->ack_notifier.gsi = airq->guest_irq; + + if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) + return 0; + + if (irqchip_in_kernel(kvm)) { + if (!msi2intx && + adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) { + free_irq(adev->host_irq, (void *)kvm); + pci_disable_msi(adev->dev); + } + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (airq->host_irq) + adev->host_irq = airq->host_irq; + else + adev->host_irq = adev->dev->irq; + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(adev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)adev)) + return -EIO; + } + + adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | + KVM_ASSIGNED_DEV_HOST_INTX; + return 0; +} + +#ifdef CONFIG_X86 +static int assigned_device_update_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *adev, + struct kvm_assigned_irq *airq) +{ + int r; + + if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { + /* x86 don't care upper address of guest msi message addr */ + adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; + adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; + adev->guest_msi.address_lo = airq->guest_msi.addr_lo; + adev->guest_msi.data = airq->guest_msi.data; + adev->ack_notifier.gsi = -1; + } else if (msi2intx) { + adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; + adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; + adev->guest_irq = airq->guest_irq; + adev->ack_notifier.gsi = airq->guest_irq; + } + + if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) + return 0; + + if (irqchip_in_kernel(kvm)) { + if (!msi2intx) { + if (adev->irq_requested_type & + KVM_ASSIGNED_DEV_HOST_INTX) + free_irq(adev->host_irq, (void *)adev); + + r = pci_enable_msi(adev->dev); + if (r) + return r; + } + + adev->host_irq = adev->dev->irq; + if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)adev)) + return -EIO; + } + + if (!msi2intx) + adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; + + adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; + return 0; +} +#endif + static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, struct kvm_assigned_irq *assigned_irq) @@ -190,49 +378,68 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, return -EINVAL; } - if (match->irq_requested) { - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - mutex_unlock(&kvm->lock); - return 0; - } + if (!match->irq_requested_type) { + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + if (irqchip_in_kernel(kvm)) { + /* Register ack nofitier */ + match->ack_notifier.gsi = -1; + match->ack_notifier.irq_acked = + kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, + &match->ack_notifier); + + /* Request IRQ source ID */ + r = kvm_request_irq_source_id(kvm); + if (r < 0) + goto out_release; + else + match->irq_source_id = r; - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); +#ifdef CONFIG_X86 + /* Determine host device irq type, we can know the + * result from dev->msi_enabled */ + if (msi2intx) + pci_enable_msi(match->dev); +#endif + } + } - if (irqchip_in_kernel(kvm)) { - if (!capable(CAP_SYS_RAWIO)) { - r = -EPERM; + if ((!msi2intx && + (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) || + (msi2intx && match->dev->msi_enabled)) { +#ifdef CONFIG_X86 + r = assigned_device_update_msi(kvm, match, assigned_irq); + if (r) { + printk(KERN_WARNING "kvm: failed to enable " + "MSI device!\n"); goto out_release; } - - if (assigned_irq->host_irq) - match->host_irq = assigned_irq->host_irq; - else - match->host_irq = match->dev->irq; - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); - r = kvm_request_irq_source_id(kvm); - if (r < 0) +#else + r = -ENOTTY; +#endif + } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { + /* Host device IRQ 0 means don't support INTx */ + if (!msi2intx) { + printk(KERN_WARNING + "kvm: wait device to enable MSI!\n"); + r = 0; + } else { + printk(KERN_WARNING + "kvm: failed to enable MSI device!\n"); + r = -ENOTTY; goto out_release; - else - match->irq_source_id = r; - - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_device", (void *)match)) { - r = -EIO; + } + } else { + /* Non-sharing INTx mode */ + r = assigned_device_update_intx(kvm, match, assigned_irq); + if (r) { + printk(KERN_WARNING "kvm: failed to enable " + "INTx device!\n"); goto out_release; } } - match->irq_requested = true; mutex_unlock(&kvm->lock); return r; out_release: @@ -283,11 +490,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, __func__); goto out_disable; } + + pci_reset_function(dev); + match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_busnr = assigned_dev->busnr; match->host_devfn = assigned_dev->devfn; match->dev = dev; - + match->irq_source_id = -1; match->kvm = kvm; list_add(&match->list, &kvm->arch.assigned_dev_head); @@ -355,57 +565,48 @@ static void ack_flush(void *_completed) { } -void kvm_flush_remote_tlbs(struct kvm *kvm) +static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) { int i, cpu, me; - cpumask_t cpus; + cpumask_var_t cpus; + bool called = true; struct kvm_vcpu *vcpu; + if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) + cpumask_clear(cpus); + me = get_cpu(); - cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; if (!vcpu) continue; - if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + if (test_and_set_bit(req, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != me) - cpu_set(cpu, cpus); + if (cpus != NULL && cpu != -1 && cpu != me) + cpumask_set_cpu(cpu, cpus); } - if (cpus_empty(cpus)) - goto out; - ++kvm->stat.remote_tlb_flush; - smp_call_function_mask(cpus, ack_flush, NULL, 1); -out: + if (unlikely(cpus == NULL)) + smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1); + else if (!cpumask_empty(cpus)) + smp_call_function_many(cpus, ack_flush, NULL, 1); + else + called = false; put_cpu(); + free_cpumask_var(cpus); + return called; } -void kvm_reload_remote_mmus(struct kvm *kvm) +void kvm_flush_remote_tlbs(struct kvm *kvm) { - int i, cpu, me; - cpumask_t cpus; - struct kvm_vcpu *vcpu; - - me = get_cpu(); - cpus_clear(cpus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; - if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) - continue; - cpu = vcpu->cpu; - if (cpu != -1 && cpu != me) - cpu_set(cpu, cpus); - } - if (cpus_empty(cpus)) - goto out; - smp_call_function_mask(cpus, ack_flush, NULL, 1); -out: - put_cpu(); + if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) + ++kvm->stat.remote_tlb_flush; } +void kvm_reload_remote_mmus(struct kvm *kvm) +{ + make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); +} int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { @@ -710,6 +911,8 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; + if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1))) + goto out; if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) @@ -821,7 +1024,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(&old, npages ? &new : NULL); + /* Slot deletion case: we have to update the current slot */ + if (!npages) + *memslot = old; #ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); @@ -918,7 +1124,7 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) +struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) { int i; @@ -931,11 +1137,12 @@ static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn) } return NULL; } +EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { gfn = unalias_gfn(kvm, gfn); - return __gfn_to_memslot(kvm, gfn); + return gfn_to_memslot_unaliased(kvm, gfn); } int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) @@ -959,7 +1166,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *slot; gfn = unalias_gfn(kvm, gfn); - slot = __gfn_to_memslot(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); if (!slot) return bad_hva(); return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); @@ -1210,7 +1417,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) struct kvm_memory_slot *memslot; gfn = unalias_gfn(kvm, gfn); - memslot = __gfn_to_memslot(kvm, gfn); + memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -1295,7 +1502,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_vcpu_fops = { +static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, @@ -1689,7 +1896,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static const struct file_operations kvm_vm_fops = { +static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .compat_ioctl = kvm_vm_ioctl, @@ -1711,6 +1918,18 @@ static int kvm_dev_ioctl_create_vm(void) return fd; } +static long kvm_dev_ioctl_check_extension_generic(long arg) +{ + switch (arg) { + case KVM_CAP_USER_MEMORY: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + return 1; + default: + break; + } + return kvm_dev_ioctl_check_extension(arg); +} + static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1730,7 +1949,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension(arg); + r = kvm_dev_ioctl_check_extension_generic(arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; @@ -1771,9 +1990,9 @@ static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); - if (cpu_isset(cpu, cpus_hardware_enabled)) + if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; - cpu_set(cpu, cpus_hardware_enabled); + cpumask_set_cpu(cpu, cpus_hardware_enabled); kvm_arch_hardware_enable(NULL); } @@ -1781,9 +2000,9 @@ static void hardware_disable(void *junk) { int cpu = raw_smp_processor_id(); - if (!cpu_isset(cpu, cpus_hardware_enabled)) + if (!cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; - cpu_clear(cpu, cpus_hardware_enabled); + cpumask_clear_cpu(cpu, cpus_hardware_enabled); kvm_arch_hardware_disable(NULL); } @@ -2017,9 +2236,14 @@ int kvm_init(void *opaque, unsigned int vcpu_size, bad_pfn = page_to_pfn(bad_page); + if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { + r = -ENOMEM; + goto out_free_0; + } + r = kvm_arch_hardware_setup(); if (r < 0) - goto out_free_0; + goto out_free_0a; for_each_online_cpu(cpu) { smp_call_function_single(cpu, @@ -2053,6 +2277,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, } kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; r = misc_register(&kvm_dev); if (r) { @@ -2062,6 +2288,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; +#ifndef CONFIG_X86 + msi2intx = 0; +#endif return 0; @@ -2078,6 +2307,8 @@ out_free_2: on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); +out_free_0a: + free_cpumask_var(cpus_hardware_enabled); out_free_0: __free_page(bad_page); out: @@ -2101,6 +2332,7 @@ void kvm_exit(void) kvm_arch_hardware_unsetup(); kvm_arch_exit(); kvm_exit_debug(); + free_cpumask_var(cpus_hardware_enabled); __free_page(bad_page); } EXPORT_SYMBOL_GPL(kvm_exit); diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 41dcc845f78c..f59874446440 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -252,6 +252,7 @@ void kvm_trace_cleanup(void) struct kvm_trace_probe *p = &kvm_trace_probes[i]; marker_probe_unregister(p->name, p->probe_func, p); } + marker_synchronize_unregister(); relay_close(kt->rchan); debugfs_remove(kt->lost_file); |