![]() | Passant Elagroudy; Mohamed Khamis; Florian Mathis; Diana Irmscher; Andreas Bulling; Albrecht Schmidt Can Privacy-Aware Lifelogs Alter Our Memories? Inproceedings Forthcoming Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), Forthcoming. @inproceedings{elagroudy2019_chi, title = {Can Privacy-Aware Lifelogs Alter Our Memories?}, author = {Passant Elagroudy and Mohamed Khamis and Florian Mathis and Diana Irmscher and Andreas Bulling and Albrecht Schmidt}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2019/03/elagroudy2019_chi.pdf}, doi = {10.1145/3290607.3313052}, year = {2019}, date = {2019-03-12}, booktitle = {Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, abstract = {The abundance of automatically-triggered lifelogging cameras is a privacy threat to bystanders. Countering this by deleting photos limits relevant memory cues and the informative content of lifelogs. An alternative is to obfuscate bystanders, but it is not clear how this impacts the lifelogger’s recall of memories. We report on a study in which we compare viewing 1) unaltered photos, 2) photos with blurred people, and 3) a subset of the photos a er deleting private ones, on memory recall. Findings show that obfuscated content helps users recall a lot of content, but it also results in recalling less accurate details, which can sometimes mislead the user. Our work informs the design of privacy- aware lifelogging systems that maximizes recall and steers discussion about ubiquitous technologies that could alter human memories.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {inproceedings} } The abundance of automatically-triggered lifelogging cameras is a privacy threat to bystanders. Countering this by deleting photos limits relevant memory cues and the informative content of lifelogs. An alternative is to obfuscate bystanders, but it is not clear how this impacts the lifelogger’s recall of memories. We report on a study in which we compare viewing 1) unaltered photos, 2) photos with blurred people, and 3) a subset of the photos a er deleting private ones, on memory recall. Findings show that obfuscated content helps users recall a lot of content, but it also results in recalling less accurate details, which can sometimes mislead the user. Our work informs the design of privacy- aware lifelogging systems that maximizes recall and steers discussion about ubiquitous technologies that could alter human memories. |
![]() | Julian Steil; Marion Koelle; Wilko Heuten; Susanne Boll; Andreas Bulling PrivacEye: Privacy-Preserving Head-Mounted Eye Tracking Using Egocentric Scene Image and Eye Movement Features Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019, (best video award). @inproceedings{steil19_etra, title = {PrivacEye: Privacy-Preserving Head-Mounted Eye Tracking Using Egocentric Scene Image and Eye Movement Features}, author = {Julian Steil and Marion Koelle and Wilko Heuten and Susanne Boll and Andreas Bulling}, url = {//perceptual.mpi-inf.mpg.de/files/2019/04/steil19_etra.pdf //perceptual.mpi-inf.mpg.de/files/2019/04/steil19_etra_supplementary_material.pdf}, doi = {10.1145/3314111.3319913}, year = {2019}, date = {2019-03-07}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, abstract = {Eyewear devices, such as augmented reality displays, increasingly integrate eye tracking, but the first-person camera required to map a user’s gaze to the visual scene can pose a significant threat to user and bystander privacy. We present PrivacEye, a method to detect privacy-sensitive everyday situations and automatically enable and disable the eye tracker’s first-person camera using a mechanical shutter. To close the shutter in privacy-sensitive situations, the method uses a deep representation of the first-person video combined with rich features that encode users’ eye movements. To open the shutter without visual input, PrivacEye detects changes in users’ eye movements alone to gauge changes in the “privacy level” of the current situation. We evaluate our method on a first-person video dataset recorded in daily life situations of 17 participants, annotated by themselves for privacy sensitivity, and show that our method is effective in preserving privacy in this challenging setting.}, note = {best video award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Eyewear devices, such as augmented reality displays, increasingly integrate eye tracking, but the first-person camera required to map a user’s gaze to the visual scene can pose a significant threat to user and bystander privacy. We present PrivacEye, a method to detect privacy-sensitive everyday situations and automatically enable and disable the eye tracker’s first-person camera using a mechanical shutter. To close the shutter in privacy-sensitive situations, the method uses a deep representation of the first-person video combined with rich features that encode users’ eye movements. To open the shutter without visual input, PrivacEye detects changes in users’ eye movements alone to gauge changes in the “privacy level” of the current situation. We evaluate our method on a first-person video dataset recorded in daily life situations of 17 participants, annotated by themselves for privacy sensitivity, and show that our method is effective in preserving privacy in this challenging setting. |
![]() | Julian Steil; Inken Hagestedt; Michael Xuelin Huang; Andreas Bulling Privacy-Aware Eye Tracking Using Differential Privacy Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019, (best paper award). @inproceedings{steil19_etra2, title = {Privacy-Aware Eye Tracking Using Differential Privacy}, author = {Julian Steil and Inken Hagestedt and Michael Xuelin Huang and Andreas Bulling}, url = {//perceptual.mpi-inf.mpg.de/files/2019/04/steil19_etra2.pdf //perceptual.mpi-inf.mpg.de/files/2019/04/steil19_etra2_supplementary_material.pdf}, doi = {10.1145/3314111.3319915}, year = {2019}, date = {2019-03-07}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, abstract = {With eye tracking being increasingly integrated into virtual and augmented reality (VR/AR) head-mounted displays, preserving users’ privacy is an ever more important, yet under-explored, topic in the eye tracking community. We report a large-scale online survey (N=124) on privacy aspects of eye tracking that provides the first comprehensive account of with whom, for which services, and to what extent users are willing to share their gaze data. Using these insights, we design a privacy-aware VR interface that uses differential privacy, which we evaluate on a new 20-participant dataset for two privacy sensitive tasks: We show that our method can prevent user re-identification and protect gender information while maintaining high performance for gaze-based document type classification. Our results highlight the privacy challenges particular to gaze data and demonstrate that differential privacy is a potential means to address them. Thus, this paper lays important foundations for future research on privacy-aware gaze interfaces.}, note = {best paper award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } With eye tracking being increasingly integrated into virtual and augmented reality (VR/AR) head-mounted displays, preserving users’ privacy is an ever more important, yet under-explored, topic in the eye tracking community. We report a large-scale online survey (N=124) on privacy aspects of eye tracking that provides the first comprehensive account of with whom, for which services, and to what extent users are willing to share their gaze data. Using these insights, we design a privacy-aware VR interface that uses differential privacy, which we evaluate on a new 20-participant dataset for two privacy sensitive tasks: We show that our method can prevent user re-identification and protect gender information while maintaining high performance for gaze-based document type classification. Our results highlight the privacy challenges particular to gaze data and demonstrate that differential privacy is a potential means to address them. Thus, this paper lays important foundations for future research on privacy-aware gaze interfaces. |
![]() | Philipp Müller; Daniel Buschek; Michael Xuelin Huang; Andreas Bulling Reducing Calibration Drift in Mobile Eye Trackers by Exploiting Mobile Phone Usage Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019. @inproceedings{mueller19_etra, title = {Reducing Calibration Drift in Mobile Eye Trackers by Exploiting Mobile Phone Usage}, author = {Philipp Müller and Daniel Buschek and Michael Xuelin Huang and Andreas Bulling}, url = {//perceptual.mpi-inf.mpg.de/files/2019/04/mueller19_etra.pdf}, doi = {10.1145/3314111.3319918}, year = {2019}, date = {2019-03-07}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, abstract = {Automatic saliency-based recalibration is promising for addressing calibration drift in mobile eye trackers but existing bottom-up saliency methods neglect user’s goal-directed visual attention in natural behaviour. By inspecting real-life recordings of egocentric eye tracker cameras, we reveal that users are likely to look at their phones once these appear in view. We propose two novel automatic recalibration methods that exploit mobile phone usage: The first builds saliency maps using the phone location in the egocentric view to identify likely gaze locations. The second uses the occurrence of touch events to recalibrate the eye tracker, thereby enabling privacy-preserving recalibration. Through in-depth evaluations on a recent mobile eye tracking dataset (N=17, 65 hours) we show that our approaches outperform a state-of-the-art saliency approach for automatic recalibration. As such, our approach improves mobile eye tracking and gaze-based interaction, particularly for long-term use.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Automatic saliency-based recalibration is promising for addressing calibration drift in mobile eye trackers but existing bottom-up saliency methods neglect user’s goal-directed visual attention in natural behaviour. By inspecting real-life recordings of egocentric eye tracker cameras, we reveal that users are likely to look at their phones once these appear in view. We propose two novel automatic recalibration methods that exploit mobile phone usage: The first builds saliency maps using the phone location in the egocentric view to identify likely gaze locations. The second uses the occurrence of touch events to recalibrate the eye tracker, thereby enabling privacy-preserving recalibration. Through in-depth evaluations on a recent mobile eye tracking dataset (N=17, 65 hours) we show that our approaches outperform a state-of-the-art saliency approach for automatic recalibration. As such, our approach improves mobile eye tracking and gaze-based interaction, particularly for long-term use. |
Kai Dierkes; Moritz Kassner; Andreas Bulling A fast approach to refraction-aware 3D eye-model fitting and gaze prediction Inproceedings Forthcoming Proc. International Symposium on Eye Tracking Research and Applications (ETRA), Forthcoming. @inproceedings{dierkes19_etra, title = {A fast approach to refraction-aware 3D eye-model fitting and gaze prediction}, author = {Kai Dierkes and Moritz Kassner and Andreas Bulling}, year = {2019}, date = {2019-03-07}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, keywords = {}, pubstate = {forthcoming}, tppubtype = {inproceedings} } | |
![]() | Xucong Zhang; Yusuke Sugano; Andreas Bulling Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019. @inproceedings{zhang19_chi, title = {Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications}, author = {Xucong Zhang and Yusuke Sugano and Andreas Bulling}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2019/01/zhang19_chi.pdf https://github.molgen.mpg.de/perceptual/opengaze}, doi = {10.1145/3290605.3300646}, year = {2019}, date = {2019-01-01}, booktitle = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, abstract = {Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved but they are still not yet widely used in the human-computer interaction (HCI) community. This is partly because it remains unclear how they perform compared to model-based approaches as well as dominant, special-purpose eye tracking equipment. To address this limitation, we evaluate the performance of state-of-the-art appearance-based gaze estimation for interaction scenarios with and without personal calibration, indoors and outdoors, for different sensing distances, as well as for users with and without glasses. We discuss the obtained findings and their implications for the most important gaze-based applications, namely explicit eye input, attentive user interfaces, gaze-based user modelling, and passive eye monitoring. To democratise the use of appearance-based gaze estimation and interaction in HCI, we finally present OpenGaze (www.opengaze.org), the first software toolkit for appearance-based gaze estimation and interaction.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Appearance-based gaze estimation methods that only require an off-the-shelf camera have significantly improved but they are still not yet widely used in the human-computer interaction (HCI) community. This is partly because it remains unclear how they perform compared to model-based approaches as well as dominant, special-purpose eye tracking equipment. To address this limitation, we evaluate the performance of state-of-the-art appearance-based gaze estimation for interaction scenarios with and without personal calibration, indoors and outdoors, for different sensing distances, as well as for users with and without glasses. We discuss the obtained findings and their implications for the most important gaze-based applications, namely explicit eye input, attentive user interfaces, gaze-based user modelling, and passive eye monitoring. To democratise the use of appearance-based gaze estimation and interaction in HCI, we finally present OpenGaze (www.opengaze.org), the first software toolkit for appearance-based gaze estimation and interaction. |
![]() | Teresa Hirzle; Jan Gugenheimer; Florian Geiselhart; Andreas Bulling; Enrico Rukzio A Design Space for Gaze Interaction on Head-mounted Displays Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019. @inproceedings{hirzle19_chi, title = {A Design Space for Gaze Interaction on Head-mounted Displays}, author = {Teresa Hirzle and Jan Gugenheimer and Florian Geiselhart and Andreas Bulling and Enrico Rukzio}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2019/01/hirzle19_chi.pdf}, doi = {10.1145/3290605.3300855}, year = {2019}, date = {2019-01-01}, booktitle = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, abstract = {Augmented and virtual reality (AR/VR) has entered the mass market and, with it, will soon eye tracking as a core technology for next generation head-mounted displays (HMDs). In contrast to existing gaze interfaces, the 3D nature of AR and VR requires estimating a user’s gaze in 3D. While first applications, such as foveated rendering, hint at the compelling potential of combining HMDs and gaze, a systematic analysis is missing. To fill this gap, we present the first design space for gaze interaction on HMDs. Our design space covers human depth perception and technical requirements in two dimensions aiming to identify challenges and opportunities for interaction design. As such, our design space provides a comprehensive overview and serves as an important guideline for researchers and practitioners working on gaze interaction on HMDs. We further demonstrate how our design space is used in practice by presenting two interactive applications: EyeHealth and XRay-Vision.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Augmented and virtual reality (AR/VR) has entered the mass market and, with it, will soon eye tracking as a core technology for next generation head-mounted displays (HMDs). In contrast to existing gaze interfaces, the 3D nature of AR and VR requires estimating a user’s gaze in 3D. While first applications, such as foveated rendering, hint at the compelling potential of combining HMDs and gaze, a systematic analysis is missing. To fill this gap, we present the first design space for gaze interaction on HMDs. Our design space covers human depth perception and technical requirements in two dimensions aiming to identify challenges and opportunities for interaction design. As such, our design space provides a comprehensive overview and serves as an important guideline for researchers and practitioners working on gaze interaction on HMDs. We further demonstrate how our design space is used in practice by presenting two interactive applications: EyeHealth and XRay-Vision. |
![]() | Teresa Hirzle; Jan Gugenheimer; Florian Geiselhart; Andreas Bulling; Enrico Rukzio Towards a Symbiotic Human-Machine Depth Sensor: Exploring 3D Gaze for Object Reconstruction Inproceedings Adj. Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 114-116, 2018. @inproceedings{hirzle18_uist, title = {Towards a Symbiotic Human-Machine Depth Sensor: Exploring 3D Gaze for Object Reconstruction}, author = {Teresa Hirzle and Jan Gugenheimer and Florian Geiselhart and Andreas Bulling and Enrico Rukzio}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2018/12/hirzle18_uist.pdf}, doi = {10.1145/3266037.3266119}, year = {2018}, date = {2018-08-02}, booktitle = {Adj. Proc. of the ACM Symposium on User Interface Software and Technology (UIST)}, pages = {114-116}, abstract = {Eye tracking is expected to become an integral part of future augmented reality (AR) head-mounted displays (HMDs) given that it can easily be integrated into existing hardware and provides a versatile interaction modality. To augment objects in the real world, AR HMDs require a three-dimensional understanding of the scene, which is currently solved using depth cameras. In this work we aim to explore how 3D gaze data can be used to enhance scene understanding for AR HMDs by envisioning a symbiotic human-machine depth camera, fusing depth data with 3D gaze information. We present a first proof of concept, exploring to what extend we are able to recognise what a user is looking at by plotting 3D gaze data. To measure 3D gaze, we implemented a vergence-based algorithm and built an eye tracking setup consisting of a Pupil Labs headset and an OptiTrack motion capture system, allowing us to measure 3D gaze inside a 50x50x50 cm volume. We show first 3D gaze plots of "gazed-at" objects and describe our vision of a symbiotic human-machine depth camera that combines a depth camera and human 3D gaze information.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Eye tracking is expected to become an integral part of future augmented reality (AR) head-mounted displays (HMDs) given that it can easily be integrated into existing hardware and provides a versatile interaction modality. To augment objects in the real world, AR HMDs require a three-dimensional understanding of the scene, which is currently solved using depth cameras. In this work we aim to explore how 3D gaze data can be used to enhance scene understanding for AR HMDs by envisioning a symbiotic human-machine depth camera, fusing depth data with 3D gaze information. We present a first proof of concept, exploring to what extend we are able to recognise what a user is looking at by plotting 3D gaze data. To measure 3D gaze, we implemented a vergence-based algorithm and built an eye tracking setup consisting of a Pupil Labs headset and an OptiTrack motion capture system, allowing us to measure 3D gaze inside a 50x50x50 cm volume. We show first 3D gaze plots of "gazed-at" objects and describe our vision of a symbiotic human-machine depth camera that combines a depth camera and human 3D gaze information. |
![]() | Mohamed Khamis; Anna Kienle; Florian Alt; Andreas Bulling GazeDrone: Mobile Eye-Based Interaction in Public Space Without Augmenting the User Inproceedings Proc. of the ACM Workshop on Micro Aerial Vehicle Networks, Systems, and Applications (DroNet), pp. 66-71, 2018. @inproceedings{khamis18_dronet, title = {GazeDrone: Mobile Eye-Based Interaction in Public Space Without Augmenting the User}, author = {Mohamed Khamis and Anna Kienle and Florian Alt and Andreas Bulling}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2018/06/khamis18_dronet.pdf}, doi = {10.1145/3213526.3213539}, year = {2018}, date = {2018-05-29}, booktitle = {Proc. of the ACM Workshop on Micro Aerial Vehicle Networks, Systems, and Applications (DroNet)}, pages = {66-71}, abstract = {Gaze interaction holds a lot of promise for seamless human-computer interaction. At the same time, current wearable mobile eye trackers require user augmentation that negatively impacts natural user behavior while remote trackers require users to position themselves within a confined tracking range. We present GazeDrone, the first system that combines a camera-equipped aerial drone with a computational method to detect sidelong glances for spontaneous (calibration-free) gaze-based interaction with surrounding pervasive systems (e.g., public displays). GazeDrone does not require augmenting each user with on-body sensors and allows interaction from arbitrary positions, even while moving. We demonstrate that drone-supported gaze interaction is feasible and accurate for certain movement types. It is well-perceived by users, in particular while interacting from a fixed position as well as while moving orthogonally or diagonally to a display. We present design implications and discuss opportunities and challenges for drone-supported gaze interaction in public.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze interaction holds a lot of promise for seamless human-computer interaction. At the same time, current wearable mobile eye trackers require user augmentation that negatively impacts natural user behavior while remote trackers require users to position themselves within a confined tracking range. We present GazeDrone, the first system that combines a camera-equipped aerial drone with a computational method to detect sidelong glances for spontaneous (calibration-free) gaze-based interaction with surrounding pervasive systems (e.g., public displays). GazeDrone does not require augmenting each user with on-body sensors and allows interaction from arbitrary positions, even while moving. We demonstrate that drone-supported gaze interaction is feasible and accurate for certain movement types. It is well-perceived by users, in particular while interacting from a fixed position as well as while moving orthogonally or diagonally to a display. We present design implications and discuss opportunities and challenges for drone-supported gaze interaction in public. |
![]() | Julian Steil; Philipp Müller; Yusuke Sugano; Andreas Bulling Forecasting User Attention During Everyday Mobile Interactions Using Device-Integrated and Wearable Sensors Inproceedings Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI), pp. 1:1–1:13, 2018, (best paper award). @inproceedings{steil18_mobilehci, title = {Forecasting User Attention During Everyday Mobile Interactions Using Device-Integrated and Wearable Sensors}, author = {Julian Steil and Philipp Müller and Yusuke Sugano and Andreas Bulling}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2018/07/steil18_mobilehci.pdf}, doi = {10.1145/3229434.3229439}, year = {2018}, date = {2018-04-16}, booktitle = {Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI)}, pages = {1:1--1:13}, abstract = {Visual attention is highly fragmented during mobile interactions but the erratic nature of attention shifts currently limits attentive user interfaces to adapt after the fact, i.e. after shifts have already happened. We instead study attention forecasting – the challenging task of predicting users' gaze behavior (overt visual attention) in the near future. We present a novel long-term dataset of everyday mobile phone interactions, continuously recorded from 20 participants engaged in common activities on a university campus over 4.5 hours each (more than 90 hours in total). We propose a proof-of-concept method that uses device-integrated sensors and body-worn cameras to encode rich information on device usage and users' visual scene. We demonstrate that our method can forecast bidirectional attention shifts and whether the primary attentional focus is on the handheld mobile device. We study the impact of different feature sets on performance and discuss the significant potential but also remaining challenges of forecasting user attention during mobile interactions.}, note = {best paper award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Visual attention is highly fragmented during mobile interactions but the erratic nature of attention shifts currently limits attentive user interfaces to adapt after the fact, i.e. after shifts have already happened. We instead study attention forecasting – the challenging task of predicting users' gaze behavior (overt visual attention) in the near future. We present a novel long-term dataset of everyday mobile phone interactions, continuously recorded from 20 participants engaged in common activities on a university campus over 4.5 hours each (more than 90 hours in total). We propose a proof-of-concept method that uses device-integrated sensors and body-worn cameras to encode rich information on device usage and users' visual scene. We demonstrate that our method can forecast bidirectional attention shifts and whether the primary attentional focus is on the handheld mobile device. We study the impact of different feature sets on performance and discuss the significant potential but also remaining challenges of forecasting user attention during mobile interactions. |
![]() | Mohamed Khamis; Florian Alt; Andreas Bulling The Past, Present, and Future of Gaze-enabled Handheld Mobile Devices: Survey and Lessons Learned Inproceedings Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI), pp. 38:1–38:17, 2018, (best paper honourable mention award). @inproceedings{khamis18_mobilehci, title = {The Past, Present, and Future of Gaze-enabled Handheld Mobile Devices: Survey and Lessons Learned}, author = {Mohamed Khamis and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/05/khamis18_mobilehci.pdf}, doi = {10.1145/3229434.3229452}, year = {2018}, date = {2018-04-16}, booktitle = {Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI)}, pages = {38:1--38:17}, abstract = {While first-generation mobile gaze interfaces required special- purpose hardware, recent advances in computational gaze estimation and the availability of sensor-rich and powerful devices is finally fulfilling the promise of pervasive eye tracking and eye-based interaction on off-the-shelf mobile devices. This work provides the first holistic view on the past, present, and future of eye tracking on handheld mobile devices. To this end, we discuss how research developed from building hardware prototypes, to accurate gaze estimation on unmodified smartphones and tablets. We then discuss implications by laying out 1) novel opportunities, including pervasive advertising and conducting in-the-wild eye tracking studies on handhelds, and 2) new challenges that require further research, such as visibility of the user’s eyes, lighting conditions, and privacy implications. We discuss how these developments shape MobileHCI research in the future, possibly the next 20 years.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } While first-generation mobile gaze interfaces required special- purpose hardware, recent advances in computational gaze estimation and the availability of sensor-rich and powerful devices is finally fulfilling the promise of pervasive eye tracking and eye-based interaction on off-the-shelf mobile devices. This work provides the first holistic view on the past, present, and future of eye tracking on handheld mobile devices. To this end, we discuss how research developed from building hardware prototypes, to accurate gaze estimation on unmodified smartphones and tablets. We then discuss implications by laying out 1) novel opportunities, including pervasive advertising and conducting in-the-wild eye tracking studies on handhelds, and 2) new challenges that require further research, such as visibility of the user’s eyes, lighting conditions, and privacy implications. We discuss how these developments shape MobileHCI research in the future, possibly the next 20 years. |
![]() | Julian Steil; Michael Xuelin Huang; Andreas Bulling Fixation Detection for Head-Mounted Eye Tracking Based on Visual Similarity of Gaze Targets Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 23:1-23:9, 2018. @inproceedings{steil18_etra, title = {Fixation Detection for Head-Mounted Eye Tracking Based on Visual Similarity of Gaze Targets}, author = {Julian Steil and Michael Xuelin Huang and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/steil18_etra.pdf https://perceptual.mpi-inf.mpg.de/research/datasets/#steil18_etra}, doi = {10.1145/3204493.3204538}, year = {2018}, date = {2018-03-28}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {23:1-23:9}, abstract = {Fixations are widely analysed in human vision, gaze-based interaction, and experimental psychology research. However, robust fixation detection in mobile settings is profoundly challenging given the prevalence of user and gaze target motion. These movements feign a shift in gaze estimates in the frame of reference defined by the eye tracker's scene camera. To address this challenge, we present a novel fixation detection method for head-mounted eye trackers. Our method exploits that, independent of user or gaze target motion, target appearance remains about the same during a fixation. It extracts image information from small regions around the current gaze position and analyses the appearance similarity of these gaze patches across video frames to detect fixations. We evaluate our method using fine-grained fixation annotations on a five-participant indoor dataset (MPIIEgoFixation) with more than 2,300 fixations in total. Our method outperforms commonly used velocity- and dispersion-based algorithms, which highlights its significant potential to analyse scene image information for eye movement detection.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Fixations are widely analysed in human vision, gaze-based interaction, and experimental psychology research. However, robust fixation detection in mobile settings is profoundly challenging given the prevalence of user and gaze target motion. These movements feign a shift in gaze estimates in the frame of reference defined by the eye tracker's scene camera. To address this challenge, we present a novel fixation detection method for head-mounted eye trackers. Our method exploits that, independent of user or gaze target motion, target appearance remains about the same during a fixation. It extracts image information from small regions around the current gaze position and analyses the appearance similarity of these gaze patches across video frames to detect fixations. We evaluate our method using fine-grained fixation annotations on a five-participant indoor dataset (MPIIEgoFixation) with more than 2,300 fixations in total. Our method outperforms commonly used velocity- and dispersion-based algorithms, which highlights its significant potential to analyse scene image information for eye movement detection. |
![]() | Michael Barz; Florian Daiber; Daniel Sonntag; Andreas Bulling Error-Aware Gaze-Based Interfaces for Robust Mobile Gaze Interaction Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 24:1-24:10, 2018, (best paper award). @inproceedings{barz18_etra, title = {Error-Aware Gaze-Based Interfaces for Robust Mobile Gaze Interaction}, author = {Michael Barz and Florian Daiber and Daniel Sonntag and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/barz18_etra.pdf}, doi = {10.1145/3204493.3204536}, year = {2018}, date = {2018-03-28}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {24:1-24:10}, abstract = {Gaze estimation error is unavoidable in head-mounted eye trackers and can severely hamper usability and performance of mobile gaze-based interfaces given that the error varies constantly for different interaction positions. In this work, we explore error-aware gaze-based interfaces that estimate and adapt to gaze estimation error on-the-fly. We implement a sample error-aware user interface for gaze-based selection and different error compensation methods: a naïve approach that increases component size directly proportional to the absolute error, a recent model by Feit et al. (CHI’17) that is based on the 2-dimensional error distribution, and a novel predictive model that shifts gaze by a directional error estimate. We evaluate these models in a 12-participant user study and show that our predictive model outperforms the others significantly in terms of selection rate, particularly for small gaze targets. These results underline both the feasibility and potential of next generation error-aware gaze-based user interfaces.}, note = {best paper award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze estimation error is unavoidable in head-mounted eye trackers and can severely hamper usability and performance of mobile gaze-based interfaces given that the error varies constantly for different interaction positions. In this work, we explore error-aware gaze-based interfaces that estimate and adapt to gaze estimation error on-the-fly. We implement a sample error-aware user interface for gaze-based selection and different error compensation methods: a naïve approach that increases component size directly proportional to the absolute error, a recent model by Feit et al. (CHI’17) that is based on the 2-dimensional error distribution, and a novel predictive model that shifts gaze by a directional error estimate. We evaluate these models in a 12-participant user study and show that our predictive model outperforms the others significantly in terms of selection rate, particularly for small gaze targets. These results underline both the feasibility and potential of next generation error-aware gaze-based user interfaces. |
![]() | Xucong Zhang; Yusuke Sugano; Andreas Bulling Revisiting Data Normalization for Appearance-Based Gaze Estimation Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 12:1-12:9, 2018. @inproceedings{zhang18_etra, title = {Revisiting Data Normalization for Appearance-Based Gaze Estimation}, author = {Xucong Zhang and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/zhang18_etra.pdf https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/gaze-based-human-computer-interaction/revisiting-data-normalization-for-appearance-based-gaze-estimation/}, doi = {10.1145/3204493.3204548}, year = {2018}, date = {2018-03-28}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {12:1-12:9}, abstract = {Appearance-based gaze estimation is promising for unconstrained real-world settings, but the significant variability in head pose and user-camera distance poses significant challenges for training generic gaze estimators. Data normalization was proposed to cancel out this geometric variability by mapping input images and gaze labels to a normalized space. Although used successfully in prior works, the role and importance of data normalization remains unclear. To fill this gap, we study data normalization for the first time using principled evaluations on both simulated and real data. We propose a modification to the current data normalization formulation by removing the scaling factor and show that our new formulation performs significantly better (between 9.5% and 32.7%) in the different evaluation settings. Using images synthesized from a 3D face model, we demonstrate the benefit of data normalization for the efficiency of the model training. Experiments on real-world images confirm the advantages of data normalization in terms of gaze estimation performance.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Appearance-based gaze estimation is promising for unconstrained real-world settings, but the significant variability in head pose and user-camera distance poses significant challenges for training generic gaze estimators. Data normalization was proposed to cancel out this geometric variability by mapping input images and gaze labels to a normalized space. Although used successfully in prior works, the role and importance of data normalization remains unclear. To fill this gap, we study data normalization for the first time using principled evaluations on both simulated and real data. We propose a modification to the current data normalization formulation by removing the scaling factor and show that our new formulation performs significantly better (between 9.5% and 32.7%) in the different evaluation settings. Using images synthesized from a 3D face model, we demonstrate the benefit of data normalization for the efficiency of the model training. Experiments on real-world images confirm the advantages of data normalization in terms of gaze estimation performance. |
![]() | Kai Dierkes; Moritz Kassner; Andreas Bulling A novel approach to single camera, glint-free 3D eye model fitting including corneal refraction Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 9:1-9:9, 2018. @inproceedings{dierkes18_etra, title = {A novel approach to single camera, glint-free 3D eye model fitting including corneal refraction}, author = {Kai Dierkes and Moritz Kassner and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/dierkes18_etra.pdf}, doi = {10.1145/3204493.3204525}, year = {2018}, date = {2018-03-28}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {9:1-9:9}, abstract = {Model-based methods for glint-free gaze estimation typically infer eye pose using pupil contours extracted from eye images. Existing methods, however, either ignore or require complex hardware setups to deal with refraction effects occurring at the corneal interfaces. In this work we provide a detailed analysis of the effects of refraction in glint-free gaze estimation using a single near-eye camera, based on the method presented by [Swirski et al. 2013]. We demonstrate systematic deviations in inferred eyeball positions and gaze directions with respect to synthetic ground-truth data and show that ignoring corneal refraction can result in angular errors of several degrees. Furthermore, we quantify gaze direction dependent errors in pupil radius estimates. We propose a novel approach to account for corneal refraction in 3D eye model fitting and by analyzing synthetic and real images show that our new method successfully captures refraction effects and helps to overcome the shortcomings of the state of the art approach.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Model-based methods for glint-free gaze estimation typically infer eye pose using pupil contours extracted from eye images. Existing methods, however, either ignore or require complex hardware setups to deal with refraction effects occurring at the corneal interfaces. In this work we provide a detailed analysis of the effects of refraction in glint-free gaze estimation using a single near-eye camera, based on the method presented by [Swirski et al. 2013]. We demonstrate systematic deviations in inferred eyeball positions and gaze directions with respect to synthetic ground-truth data and show that ignoring corneal refraction can result in angular errors of several degrees. Furthermore, we quantify gaze direction dependent errors in pupil radius estimates. We propose a novel approach to account for corneal refraction in 3D eye model fitting and by analyzing synthetic and real images show that our new method successfully captures refraction effects and helps to overcome the shortcomings of the state of the art approach. |
![]() | Philipp Müller; Michael Xuelin Huang; Xucong Zhang; Andreas Bulling Robust Eye Contact Detection in Natural Multi-Person Interactions Using Gaze and Speaking Behaviour Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 31:1-31:10, 2018. @inproceedings{mueller18_etra, title = {Robust Eye Contact Detection in Natural Multi-Person Interactions Using Gaze and Speaking Behaviour}, author = {Philipp Müller and Michael Xuelin Huang and Xucong Zhang and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/mueller18_etra.pdf}, doi = {10.1145/3204493.3204549}, year = {2018}, date = {2018-03-28}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {31:1-31:10}, abstract = {Eye contact is one of the most important non-verbal social cues and fundamental to human interactions. However, detecting eye contact without specialized eye tracking equipment poses significant challenges, particularly for multiple people in real-world settings. We present a novel method to robustly detect eye contact in natural three- and four-person interactions using off-the-shelf ambient cameras. Our method exploits that, during conversations, people tend to look at the person who is currently speaking. Harnessing the correlation between people's gaze and speaking behaviour therefore allows our method to automatically acquire training data during deployment and adaptively train eye contact detectors for each target user. We empirically evaluate the performance of our method on a recent dataset of natural group interactions and demonstrate that it achieves a relative improvement over the state-of-the-art method of more than 60%, and also improves over a head pose based baseline.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Eye contact is one of the most important non-verbal social cues and fundamental to human interactions. However, detecting eye contact without specialized eye tracking equipment poses significant challenges, particularly for multiple people in real-world settings. We present a novel method to robustly detect eye contact in natural three- and four-person interactions using off-the-shelf ambient cameras. Our method exploits that, during conversations, people tend to look at the person who is currently speaking. Harnessing the correlation between people's gaze and speaking behaviour therefore allows our method to automatically acquire training data during deployment and adaptively train eye contact detectors for each target user. We empirically evaluate the performance of our method on a recent dataset of natural group interactions and demonstrate that it achieves a relative improvement over the state-of-the-art method of more than 60%, and also improves over a head pose based baseline. |
![]() | Seonwook Park; Xucong Zhang; Andreas Bulling; Otmar Hilliges Learning to Find Eye Region Landmarks for Remote Gaze Estimation in Unconstrained Settings Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 21:1-21:10, 2018, (best presentation award). @inproceedings{park18_etra, title = {Learning to Find Eye Region Landmarks for Remote Gaze Estimation in Unconstrained Settings}, author = {Seonwook Park and Xucong Zhang and Andreas Bulling and Otmar Hilliges}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/park18_etra.pdf https://youtu.be/I8WlEHgDBV4}, doi = {10.1145/3204493.3204545}, year = {2018}, date = {2018-03-27}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {21:1-21:10}, abstract = {Conventional feature-based and model-based gaze estimation methods have proven to perform well in settings with controlled illumination and specialized cameras. In unconstrained real-world settings, however, such methods are surpassed by recent appearance-based methods due to difficulties in modeling factors such as illumination changes and other visual artifacts. We present a novel learning-based method for eye region landmark localization that enables conventional methods to be competitive to latest appearance-based methods. Despite having been trained exclusively on synthetic data, our method exceeds the state of the art for iris localization and eye shape registration on real-world imagery. We then use the detected landmarks as input to iterative model-fitting and lightweight learning-based gaze estimation methods. Our approach outperforms existing model-fitting and appearance-based methods in the context of person-independent and personalized gaze estimation.}, note = {best presentation award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Conventional feature-based and model-based gaze estimation methods have proven to perform well in settings with controlled illumination and specialized cameras. In unconstrained real-world settings, however, such methods are surpassed by recent appearance-based methods due to difficulties in modeling factors such as illumination changes and other visual artifacts. We present a novel learning-based method for eye region landmark localization that enables conventional methods to be competitive to latest appearance-based methods. Despite having been trained exclusively on synthetic data, our method exceeds the state of the art for iris localization and eye shape registration on real-world imagery. We then use the detected landmarks as input to iterative model-fitting and lightweight learning-based gaze estimation methods. Our approach outperforms existing model-fitting and appearance-based methods in the context of person-independent and personalized gaze estimation. |
![]() | Thomas Mattusch; Mahsa Mirzamohammad; Mohamed Khamis; Andreas Bulling; Florian Alt Hidden Pursuits: Evaluating Gaze-selection via Pursuits when the Stimulus Trajectory is Partially Hidden Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 27:1-27:5, 2018. @inproceedings{mattusch18_etra, title = {Hidden Pursuits: Evaluating Gaze-selection via Pursuits when the Stimulus Trajectory is Partially Hidden}, author = {Thomas Mattusch and Mahsa Mirzamohammad and Mohamed Khamis and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/mattusch18_etra.pdf}, doi = {10.1145/3204493.3204569}, year = {2018}, date = {2018-03-27}, booktitle = {Proc. International Symposium on Eye Tracking Research and Applications (ETRA)}, pages = {27:1-27:5}, abstract = {The idea behind gaze interaction using Pursuits is to leverage the human's smooth pursuit eye movements performed when following moving targets. However, humans can also anticipate where a moving target would reappear if it temporarily hides from their view. In this work, we investigate how well users can select targets using Pursuits in cases where the target's trajectory is partially invisible (HiddenPursuits): e.g., can users select a moving target that temporarily hides behind another object? Although HiddenPursuits was not studied in the context of interaction before, understanding how well users can perform HiddenPursuits presents numerous opportunities, particularly for small interfaces where a target's trajectory can cover area outside of the screen. We found that users can still select targets quickly via Pursuits even if their trajectory is up to 50% hidden, and at the expense of longer selection times when the hidden portion is larger. We discuss how gaze-based interfaces can leverage HiddenPursuits for an improved user experience.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The idea behind gaze interaction using Pursuits is to leverage the human's smooth pursuit eye movements performed when following moving targets. However, humans can also anticipate where a moving target would reappear if it temporarily hides from their view. In this work, we investigate how well users can select targets using Pursuits in cases where the target's trajectory is partially invisible (HiddenPursuits): e.g., can users select a moving target that temporarily hides behind another object? Although HiddenPursuits was not studied in the context of interaction before, understanding how well users can perform HiddenPursuits presents numerous opportunities, particularly for small interfaces where a target's trajectory can cover area outside of the screen. We found that users can still select targets quickly via Pursuits even if their trajectory is up to 50% hidden, and at the expense of longer selection times when the hidden portion is larger. We discuss how gaze-based interfaces can leverage HiddenPursuits for an improved user experience. |
![]() | Mohamed Khamis; Carl Oechsner; Florian Alt; Andreas Bulling VRPursuits: Interaction in Virtual Reality using Smooth Pursuit Eye Movements Inproceedings Proc. International Conference on Advanced Visual Interfaces (AVI), pp. 18:1-18:8, 2018. @inproceedings{khamis18_avi, title = {VRPursuits: Interaction in Virtual Reality using Smooth Pursuit Eye Movements}, author = {Mohamed Khamis and Carl Oechsner and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/04/khamis18_avi.pdf}, doi = {10.1145/3206505.3206522}, year = {2018}, date = {2018-03-26}, booktitle = {Proc. International Conference on Advanced Visual Interfaces (AVI)}, pages = {18:1-18:8}, abstract = {Gaze-based interaction using smooth pursuit eye movements (Pursuits) is attractive given that it is intuitive and overcomes the Midas touch problem. At the same time, eye tracking is becoming increasingly popular for VR applications. While Pursuits was shown to be effective in several interaction contexts, it was never explored in-depth for VR before. In a user study (N=26), we investigated how parameters that are specific to VR settings influence the performance of Pursuits. We found that Pursuits is robust against different sizes of virtual 3D targets and sizes to them. However, Pursuits' performance improves when the trajectory size is larger, particularly if the user is walking while interacting. While walking, selecting moving targets via Pursuits is generally feasible albeit less accurate than when stationary. Finally, we discuss the implications of these findings and the potential of smooth pursuits for interaction in VR by demonstrating two sample use cases: 1) gaze-based authentication in VR, and 2) a space meteors shooting game.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze-based interaction using smooth pursuit eye movements (Pursuits) is attractive given that it is intuitive and overcomes the Midas touch problem. At the same time, eye tracking is becoming increasingly popular for VR applications. While Pursuits was shown to be effective in several interaction contexts, it was never explored in-depth for VR before. In a user study (N=26), we investigated how parameters that are specific to VR settings influence the performance of Pursuits. We found that Pursuits is robust against different sizes of virtual 3D targets and sizes to them. However, Pursuits' performance improves when the trajectory size is larger, particularly if the user is walking while interacting. While walking, selecting moving targets via Pursuits is generally feasible albeit less accurate than when stationary. Finally, we discuss the implications of these findings and the potential of smooth pursuits for interaction in VR by demonstrating two sample use cases: 1) gaze-based authentication in VR, and 2) a space meteors shooting game. |
![]() | Philipp Müller; Michael Xuelin Huang; Andreas Bulling Detecting Low Rapport During Natural Interactions in Small Groups from Non-Verbal Behaviour Inproceedings Proc. ACM International Conference on Intelligent User Interfaces (IUI), pp. 153-164, 2018. @inproceedings{mueller18_iui, title = {Detecting Low Rapport During Natural Interactions in Small Groups from Non-Verbal Behaviour}, author = {Philipp Müller and Michael Xuelin Huang and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/01/mueller2018_iui.pdf}, doi = {10.1145/3172944.3172969}, year = {2018}, date = {2018-03-07}, booktitle = {Proc. ACM International Conference on Intelligent User Interfaces (IUI)}, pages = {153-164}, abstract = {Rapport, the close and harmonious relationship in which interaction partners are "in sync" with each other, was shown to result in smoother social interactions, improved collaboration, and improved interpersonal outcomes. In this work, we are first to investigate automatic prediction of low rapport during natural interactions within small groups. This task is challenging given that rapport only manifests in subtle non-verbal signals that are, in addition, subject to influences of group dynamics as well as inter-personal idiosyncrasies. We record videos of unscripted discussions of three to four people using a multi-view camera system and microphones. We analyse a rich set of non-verbal signals for rapport detection, namely facial expressions, hand motion, gaze, speaker turns, and speech prosody. Using facial features, we can detect low rapport with an average precision of 0.7 (chance level at 0.25), while incorporating prior knowledge of participants' personalities can even achieve early prediction without a drop in performance. We further provide a detailed analysis of different feature sets and the amount of information contained in different temporal segments of the interactions.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Rapport, the close and harmonious relationship in which interaction partners are "in sync" with each other, was shown to result in smoother social interactions, improved collaboration, and improved interpersonal outcomes. In this work, we are first to investigate automatic prediction of low rapport during natural interactions within small groups. This task is challenging given that rapport only manifests in subtle non-verbal signals that are, in addition, subject to influences of group dynamics as well as inter-personal idiosyncrasies. We record videos of unscripted discussions of three to four people using a multi-view camera system and microphones. We analyse a rich set of non-verbal signals for rapport detection, namely facial expressions, hand motion, gaze, speaker turns, and speech prosody. Using facial features, we can detect low rapport with an average precision of 0.7 (chance level at 0.25), while incorporating prior knowledge of participants' personalities can even achieve early prediction without a drop in performance. We further provide a detailed analysis of different feature sets and the amount of information contained in different temporal segments of the interactions. |
![]() | Mohamed Khamis; Anita Baier; Niels Henze; Florian Alt; Andreas Bulling Understanding Face and Eye Visibility in Front-Facing Cameras of Smartphones used in the Wild Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 280:1-280:12, 2018. @inproceedings{khamis18a_chi, title = {Understanding Face and Eye Visibility in Front-Facing Cameras of Smartphones used in the Wild}, author = {Mohamed Khamis and Anita Baier and Niels Henze and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/01/khamis18a_chi.pdf https://www.youtube.com/watch?v=_L6FyzTjFG0}, doi = {10.1145/3173574.3173854}, year = {2018}, date = {2018-01-01}, booktitle = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, journal = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {280:1-280:12}, abstract = {Commodity mobile devices are now equipped with high-resolution front-facing cameras, paving the way for applications in biometrics, facial expression analysis, or gaze interaction. However, it is unknown how often users hold devices in a way that allows capturing their face or eyes, and how this impacts detection accuracy. We collected 25,726 in-the-wild photos taken from the front-facing camera of smartphones and associated application usage logs. We found that the full face is visible about 29% of the time, and that in most cases the face is only partially visible. We further identified an influence of users' current activity; for example, when watching videos, the eyes but not the entire face are visible 75% of the time in our dataset. We found that state-of-the-art face detection algorithms perform poorly against photos taken from front-facing cameras. We discuss how these findings impact mobile applications that leverage face and eye detection, and derive practical implications to address state-of-the art's limitations.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Commodity mobile devices are now equipped with high-resolution front-facing cameras, paving the way for applications in biometrics, facial expression analysis, or gaze interaction. However, it is unknown how often users hold devices in a way that allows capturing their face or eyes, and how this impacts detection accuracy. We collected 25,726 in-the-wild photos taken from the front-facing camera of smartphones and associated application usage logs. We found that the full face is visible about 29% of the time, and that in most cases the face is only partially visible. We further identified an influence of users' current activity; for example, when watching videos, the eyes but not the entire face are visible 75% of the time in our dataset. We found that state-of-the-art face detection algorithms perform poorly against photos taken from front-facing cameras. We discuss how these findings impact mobile applications that leverage face and eye detection, and derive practical implications to address state-of-the art's limitations. |
![]() | Mohamed Khamis; Christian Becker; Andreas Bulling; Florian Alt Which one is me? Identifying Oneself on Public Displays Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 287:1-287:12, 2018, (best paper honourable mention award). @inproceedings{khamis18b_chi, title = {Which one is me? Identifying Oneself on Public Displays}, author = {Mohamed Khamis and Christian Becker and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/01/khamis18b_chi.pdf https://www.youtube.com/watch?v=yG5_RBrnRx0}, doi = {10.1145/3173574.3173861}, year = {2018}, date = {2018-01-01}, booktitle = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, journal = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {287:1-287:12}, abstract = {While user representations are extensively used on public displays, it remains unclear how well users can recognize their own representation among those of surrounding users. We study the most widely used representations: abstract objects, skeletons, silhouettes and mirrors. In a prestudy (N=12), we identify five strategies that users follow to recognize themselves on public displays. In a second study (N=19), we quantify the users' recognition time and accuracy with respect to each representation type. Our findings suggest that there is a significant effect of (1) the representation type, (2) the strategies performed by users, and (3) the combination of both on recognition time and accuracy. We discuss the suitability of each representation for different settings and provide specific recommendations as to how user representations should be applied in multi-user scenarios. These recommendations guide practitioners and researchers in selecting the representation that optimizes the most for the deployment's requirements, and for the user strategies that are feasible in that environment.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } While user representations are extensively used on public displays, it remains unclear how well users can recognize their own representation among those of surrounding users. We study the most widely used representations: abstract objects, skeletons, silhouettes and mirrors. In a prestudy (N=12), we identify five strategies that users follow to recognize themselves on public displays. In a second study (N=19), we quantify the users' recognition time and accuracy with respect to each representation type. Our findings suggest that there is a significant effect of (1) the representation type, (2) the strategies performed by users, and (3) the combination of both on recognition time and accuracy. We discuss the suitability of each representation for different settings and provide specific recommendations as to how user representations should be applied in multi-user scenarios. These recommendations guide practitioners and researchers in selecting the representation that optimizes the most for the deployment's requirements, and for the user strategies that are feasible in that environment. |
![]() | Xucong Zhang; Michael Xuelin Huang; Yusuke Sugano; Andreas Bulling Training Person-Specific Gaze Estimators from Interactions with Multiple Devices Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 624:1-624:12, 2018. @inproceedings{zhang18_chi, title = {Training Person-Specific Gaze Estimators from Interactions with Multiple Devices}, author = {Xucong Zhang and Michael Xuelin Huang and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/02/zhang18_chi.pdf}, doi = {10.1145/3173574.3174198}, year = {2018}, date = {2018-01-01}, booktitle = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, journal = {Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {624:1-624:12}, abstract = {Learning-based gaze estimation has significant potential to enable attentive user interfaces and gaze-based interaction on the billions of camera-equipped handheld devices and ambient displays. While training accurate person- and device-independent gaze estimators remains challenging, person-specific training is feasible but requires tedious data collection for each target device. To address these limitations, we present the first method to train person-specific gaze estimators across multiple devices. At the core of our method is a single convolutional neural network with shared feature extraction layers and device-specific branches that we train from face images and corresponding on-screen gaze locations. Detailed evaluations on a new dataset of interactions with five common devices (mobile phone, tablet, laptop, desktop computer, smart TV) and three common applications (mobile game, text editing, media center) demonstrate the significant potential of cross-device training. We further explore training with gaze locations derived from natural interactions, such as mouse or touch input.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Learning-based gaze estimation has significant potential to enable attentive user interfaces and gaze-based interaction on the billions of camera-equipped handheld devices and ambient displays. While training accurate person- and device-independent gaze estimators remains challenging, person-specific training is feasible but requires tedious data collection for each target device. To address these limitations, we present the first method to train person-specific gaze estimators across multiple devices. At the core of our method is a single convolutional neural network with shared feature extraction layers and device-specific branches that we train from face images and corresponding on-screen gaze locations. Detailed evaluations on a new dataset of interactions with five common devices (mobile phone, tablet, laptop, desktop computer, smart TV) and three common applications (mobile game, text editing, media center) demonstrate the significant potential of cross-device training. We further explore training with gaze locations derived from natural interactions, such as mouse or touch input. |
![]() | Arif Khan; Ingmar Steiner; Yusuke Sugano; Andreas Bulling; Ross Macdonald A Multimodal Corpus of Expert Gaze and Behavior during Phonetic Segmentation Tasks Inproceedings Proc. Language Resources and Evaluation Conference (LREC), 2018. @inproceedings{khan18_lrec, title = {A Multimodal Corpus of Expert Gaze and Behavior during Phonetic Segmentation Tasks}, author = {Arif Khan and Ingmar Steiner and Yusuke Sugano and Andreas Bulling and Ross Macdonald}, url = {https://perceptual.mpi-inf.mpg.de/files/2018/05/khan18_lrec.pdf https://git.io/eyeseg-data}, year = {2018}, date = {2018-01-01}, booktitle = {Proc. Language Resources and Evaluation Conference (LREC)}, abstract = {Phonetic segmentation is the process of splitting speech into distinct phonetic units. Human experts routinely perform this task manually by analyzing auditory and visual cues using analysis software, which is an extremely time-consuming process. Methods exist for automatic segmentation, but these are not always accurate enough. In order to improve automatic segmentation, we need to model it as close to the manual segmentation as possible. This corpus is an effort to capture the human segmentation behavior by recording experts performing a segmentation task. We believe that this data will enable us to highlight the important aspects of manual segmentation, which can be used in automatic segmentation to improve its accuracy.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Phonetic segmentation is the process of splitting speech into distinct phonetic units. Human experts routinely perform this task manually by analyzing auditory and visual cues using analysis software, which is an extremely time-consuming process. Methods exist for automatic segmentation, but these are not always accurate enough. In order to improve automatic segmentation, we need to model it as close to the manual segmentation as possible. This corpus is an effort to capture the human segmentation behavior by recording experts performing a segmentation task. We believe that this data will enable us to highlight the important aspects of manual segmentation, which can be used in automatic segmentation to improve its accuracy. |
![]() | Mohamed Khamis; Linda Bandelow; Stina Schick; Dario Casadevall; Andreas Bulling; Florian Alt They are all after you: Investigating the Viability of a Threat Model that involves Multiple Shoulder Surfers Inproceedings Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 31-35, 2017, (best paper honourable mention award). @inproceedings{khamis17_mum, title = {They are all after you: Investigating the Viability of a Threat Model that involves Multiple Shoulder Surfers}, author = {Mohamed Khamis and Linda Bandelow and Stina Schick and Dario Casadevall and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/10/khamis17_mum.pdf}, doi = {10.1145/3152832.3152851}, year = {2017}, date = {2017-10-09}, booktitle = {Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM)}, pages = {31-35}, abstract = {Many of the authentication schemes for mobile devices that were proposed lately complicate shoulder surfing by splitting the attacker’s attention into two or more entities. For example, multimodal authentication schemes such as GazeTouchPIN and GazeTouchPass require attackers to observe the user’s gaze input and the touch input performed on the phone’s screen. These schemes have always been evaluated against single observers, while multiple observers could potentially attack these schemes with greater ease, since each of them can focus exclusively on one part of the password. In this work, we study the effectiveness of a novel threat model against authentication schemes that split the attacker’s attention. As a case study, we report on a security evaluation of two state of the art authentication schemes in the case of a team of two observers. Our results show that although multiple observers perform better against these schemes than single observers, multimodal schemes are significantly more secure against multiple observers compared to schemes that employ a single modality. We discuss how this threat model impacts the design of authentication schemes.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Many of the authentication schemes for mobile devices that were proposed lately complicate shoulder surfing by splitting the attacker’s attention into two or more entities. For example, multimodal authentication schemes such as GazeTouchPIN and GazeTouchPass require attackers to observe the user’s gaze input and the touch input performed on the phone’s screen. These schemes have always been evaluated against single observers, while multiple observers could potentially attack these schemes with greater ease, since each of them can focus exclusively on one part of the password. In this work, we study the effectiveness of a novel threat model against authentication schemes that split the attacker’s attention. As a case study, we report on a security evaluation of two state of the art authentication schemes in the case of a team of two observers. Our results show that although multiple observers perform better against these schemes than single observers, multimodal schemes are significantly more secure against multiple observers compared to schemes that employ a single modality. We discuss how this threat model impacts the design of authentication schemes. |
![]() | Christian Lander; Sven Gehring; Markus Löchtefeld; Andreas Bulling; Antonio Krüger EyeMirror: Mobile Calibration-Free Gaze Approximation using Corneal Imaging Inproceedings Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 279-291, 2017. @inproceedings{lander17_mum, title = {EyeMirror: Mobile Calibration-Free Gaze Approximation using Corneal Imaging}, author = {Christian Lander and Sven Gehring and Markus Löchtefeld and Andreas Bulling and Antonio Krüger}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/11/lander17_mum.pdf}, doi = {10.1145/3152832.3152839}, year = {2017}, date = {2017-10-09}, booktitle = {Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM)}, pages = {279-291}, abstract = {Gaze is a powerful measure of people’s attracted attention and reveals where we are looking at within our current field of view. Hence, gaze-based interfaces are gaining in importance. However, gaze estimation usually requires extensive hardware and depends on a calibration that has to be renewed regularly. We present EyeMirror, a mobile device for calibration-free gaze approximation on surfaces (e.g. displays). It consists of a head-mounted camera, connected to a wearable mini-computer, capturing the environment reflected on the human cornea. The corneal images are analyzed using natural feature tracking for gaze estimation on surfaces. In two lab studies we compared variations of EyeMirror against established methods for gaze estimation in a display scenario, and investigated the effect of display content (i.e. number of features). EyeMirror achieved 4.03° gaze estimation error, while we found no significant effect of display content.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze is a powerful measure of people’s attracted attention and reveals where we are looking at within our current field of view. Hence, gaze-based interfaces are gaining in importance. However, gaze estimation usually requires extensive hardware and depends on a calibration that has to be renewed regularly. We present EyeMirror, a mobile device for calibration-free gaze approximation on surfaces (e.g. displays). It consists of a head-mounted camera, connected to a wearable mini-computer, capturing the environment reflected on the human cornea. The corneal images are analyzed using natural feature tracking for gaze estimation on surfaces. In two lab studies we compared variations of EyeMirror against established methods for gaze estimation in a display scenario, and investigated the effect of display content (i.e. number of features). EyeMirror achieved 4.03° gaze estimation error, while we found no significant effect of display content. |
![]() | Mohamed Khamis; Mariam Hassib; Emanuel von Zezschwitz; Andreas Bulling; Florian Alt GazeTouchPIN: Protecting Sensitive Data on Mobile Devices using Secure Multimodal Authentication Inproceedings Proc. of the 19th ACM International Conference on Multimodal Interaction (ICMI), pp. 446-450, 2017. @inproceedings{khamis17_icmi, title = {GazeTouchPIN: Protecting Sensitive Data on Mobile Devices using Secure Multimodal Authentication}, author = {Mohamed Khamis and Mariam Hassib and Emanuel von Zezschwitz and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/11/khamis17_icmi.pdf https://www.youtube.com/watch?v=gs2YO0gP4kI}, doi = {10.1145/3136755.3136809}, year = {2017}, date = {2017-08-31}, booktitle = {Proc. of the 19th ACM International Conference on Multimodal Interaction (ICMI)}, pages = {446-450}, abstract = {Although mobile devices provide access to a plethora of sensitive data, most users still only protect them with PINs or patterns, which are vulnerable to side-channel attacks (e.g., shoulder surfing). However, prior research has shown that privacy-aware users are willing to take further steps to protect their private data. We propose GazeTouchPIN, a novel secure authentication scheme for mobile devices that combines gaze and touch input. Our multimodal approach complicates shoulder-surfing attacks by requiring attackers to observe the screen as well as the user’s eyes to find the password. We evaluate the security and usability of GazeTouchPIN in two user studies (N=30). We found that while GazeTouchPIN requires longer entry times, privacy aware users would use it on-demand when feeling observed or when accessing sensitive data. The results show that successful shoulder surfing attack rate drops from 68% to 10.4% when using GazeTouchPIN.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Although mobile devices provide access to a plethora of sensitive data, most users still only protect them with PINs or patterns, which are vulnerable to side-channel attacks (e.g., shoulder surfing). However, prior research has shown that privacy-aware users are willing to take further steps to protect their private data. We propose GazeTouchPIN, a novel secure authentication scheme for mobile devices that combines gaze and touch input. Our multimodal approach complicates shoulder-surfing attacks by requiring attackers to observe the screen as well as the user’s eyes to find the password. We evaluate the security and usability of GazeTouchPIN in two user studies (N=30). We found that while GazeTouchPIN requires longer entry times, privacy aware users would use it on-demand when feeling observed or when accessing sensitive data. The results show that successful shoulder surfing attack rate drops from 68% to 10.4% when using GazeTouchPIN. |
![]() | Hosnieh Sattar; Andreas Bulling; Mario Fritz Predicting the Category and Attributes of Visual Search Targets Using Deep Gaze Pooling Inproceedings Proc. of the IEEE International Conference on Computer Vision Workshops (ICCVW), pp. 2740-2748, 2017. @inproceedings{sattar17_iccvw, title = {Predicting the Category and Attributes of Visual Search Targets Using Deep Gaze Pooling}, author = {Hosnieh Sattar and Andreas Bulling and Mario Fritz}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/08/sattar17_iccvw.pdf}, doi = {10.1109/ICCVW.2017.322}, year = {2017}, date = {2017-08-23}, booktitle = {Proc. of the IEEE International Conference on Computer Vision Workshops (ICCVW)}, pages = {2740-2748}, abstract = {Predicting the target of visual search from eye fixation (gaze) data is a challenging problem with many applications in human-computer interaction. In contrast to previous work that has focused on individual instances as search target, we propose the first approach to predict categories and attributes of search targets based on gaze data. However, state of the art models for categorical recognition in general require large amounts of training data, which is prohibitive for gaze data. To address this challenge, we propose a novel Gaze Pooling Layer that integrates gaze information into CNN-based architectures as an attention mechanism - incorporating both spatial and temporal aspects of human gaze behavior. We show that our approach is effective even when the gaze pooling layer is added to an already trained CNN, thus eliminating the need for expensive joint data collection of visual and gaze data. We propose an experimental setup and data set and demonstrate the effectiveness of our method for search target prediction based on gaze behavior. We further study how to integrate temporal and spatial gaze information most effectively, and indicate directions for future research in gaze-based prediction of mental states.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Predicting the target of visual search from eye fixation (gaze) data is a challenging problem with many applications in human-computer interaction. In contrast to previous work that has focused on individual instances as search target, we propose the first approach to predict categories and attributes of search targets based on gaze data. However, state of the art models for categorical recognition in general require large amounts of training data, which is prohibitive for gaze data. To address this challenge, we propose a novel Gaze Pooling Layer that integrates gaze information into CNN-based architectures as an attention mechanism - incorporating both spatial and temporal aspects of human gaze behavior. We show that our approach is effective even when the gaze pooling layer is added to an already trained CNN, thus eliminating the need for expensive joint data collection of visual and gaze data. We propose an experimental setup and data set and demonstrate the effectiveness of our method for search target prediction based on gaze behavior. We further study how to integrate temporal and spatial gaze information most effectively, and indicate directions for future research in gaze-based prediction of mental states. |
![]() | Mohamed Khamis; Regina Hasholzner; Andreas Bulling; Florian Alt GTmoPass: Two-factor Authentication on Public Displays Using GazeTouch passwords and Personal Mobile Devices Inproceedings Proc. of the ACM International Symposium on Pervasive Displays (PerDis), pp. 8:1-8:9, 2017. @inproceedings{khamis2017_perdis, title = {GTmoPass: Two-factor Authentication on Public Displays Using GazeTouch passwords and Personal Mobile Devices}, author = {Mohamed Khamis and Regina Hasholzner and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/08/khamis2017_perdis.pdf}, doi = {10.1145/3078810.3078815}, year = {2017}, date = {2017-08-16}, booktitle = {Proc. of the ACM International Symposium on Pervasive Displays (PerDis)}, pages = {8:1-8:9}, abstract = {As public displays continue to deliver increasingly private and personalized content, there is a need to ensure that only the legitimate users can access private information in sensitive contexts. While public displays can adopt similar authentication concepts like those used on public terminals (e.g., ATMs), authentication in public is subject to a number of risks. Namely, adversaries can uncover a user’s password through (1) surfing users, (2) thermal attacks, or (3) smudge attacks. To address this problem we propose GTmoPass, an authentication architecture that enables Multi-factor user authentication on public displays. The first factor is a knowledge-factor: we employ a shoulder-surfing resilient multimodal scheme that combines gaze and touch input for password entry. The second factor is a possession-factor: users utilize their personal mobile devices, on which they enter the password. Credentials are securely transmitted to a server via Bluetooth beacons. We describe the implementation of GTmoPass and report on an evaluation of its usability and security, which shows that although authentication using GTmoPass is slightly slower than traditional methods, it protects against the three aforementioned threats.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } As public displays continue to deliver increasingly private and personalized content, there is a need to ensure that only the legitimate users can access private information in sensitive contexts. While public displays can adopt similar authentication concepts like those used on public terminals (e.g., ATMs), authentication in public is subject to a number of risks. Namely, adversaries can uncover a user’s password through (1) surfing users, (2) thermal attacks, or (3) smudge attacks. To address this problem we propose GTmoPass, an authentication architecture that enables Multi-factor user authentication on public displays. The first factor is a knowledge-factor: we employ a shoulder-surfing resilient multimodal scheme that combines gaze and touch input for password entry. The second factor is a possession-factor: users utilize their personal mobile devices, on which they enter the password. Credentials are securely transmitted to a server via Bluetooth beacons. We describe the implementation of GTmoPass and report on an evaluation of its usability and security, which shows that although authentication using GTmoPass is slightly slower than traditional methods, it protects against the three aforementioned threats. |
![]() | Xucong Zhang; Yusuke Sugano; Andreas Bulling Everyday Eye Contact Detection Using Unsupervised Gaze Target Discovery Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 193-203, 2017, (best paper honourable mention award). @inproceedings{zhang17_uist, title = {Everyday Eye Contact Detection Using Unsupervised Gaze Target Discovery}, author = {Xucong Zhang and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/05/zhang17_uist.pdf https://www.youtube.com/watch?v=ccrS5XuhQpk https://www.youtube.com/watch?v=AxDHU40Xda8 http://www.techbriefs.com/component/content/article/1198-tb/news/news/27400-new-software-spots-eye-contact}, doi = {10.1145/3126594.3126614}, year = {2017}, date = {2017-06-26}, booktitle = {Proc. of the ACM Symposium on User Interface Software and Technology (UIST)}, pages = {193-203}, abstract = {Eye contact is an important non-verbal cue in social signal processing and promising as a measure of overt attention in human-object interactions and attentive user interfaces. However, robust detection of eye contact across different users, gaze targets, camera positions, and illumination conditions is notoriously challenging. We present a novel method for eye contact detection that combines a state-of-the-art appearance-based gaze estimator with a novel approach for unsupervised gaze target discovery, i.e. without the need for tedious and time-consuming manual data annotation. We evaluate our method in two real-world scenarios: detecting eye contact at the workplace, including on the main work display, from cameras mounted to target objects, as well as during everyday social interactions with the wearer of a head-mounted egocentric camera. We empirically evaluate the performance of our method in both scenarios and demonstrate its effectiveness for detecting eye contact independent of target object type and size, camera position, and user and recording environment.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Eye contact is an important non-verbal cue in social signal processing and promising as a measure of overt attention in human-object interactions and attentive user interfaces. However, robust detection of eye contact across different users, gaze targets, camera positions, and illumination conditions is notoriously challenging. We present a novel method for eye contact detection that combines a state-of-the-art appearance-based gaze estimator with a novel approach for unsupervised gaze target discovery, i.e. without the need for tedious and time-consuming manual data annotation. We evaluate our method in two real-world scenarios: detecting eye contact at the workplace, including on the main work display, from cameras mounted to target objects, as well as during everyday social interactions with the wearer of a head-mounted egocentric camera. We empirically evaluate the performance of our method in both scenarios and demonstrate its effectiveness for detecting eye contact independent of target object type and size, camera position, and user and recording environment. |
![]() | Mohamed Khamis; Axel Hoesl; Alexander Klimczak; Martin Reiss; Florian Alt; Andreas Bulling EyeScout: Active Eye Tracking for Position and Movement Independent Gaze Interaction with Large Public Displays Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 155-166, 2017. @inproceedings{khamis17_uist, title = {EyeScout: Active Eye Tracking for Position and Movement Independent Gaze Interaction with Large Public Displays}, author = {Mohamed Khamis and Axel Hoesl and Alexander Klimczak and Martin Reiss and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/05/khamis17_uist.pdf https://www.youtube.com/watch?v=D1IprYwqToM https://www.youtube.com/watch?v=J7_OiRqsmdM}, doi = {10.1145/3126594.3126630}, year = {2017}, date = {2017-06-26}, booktitle = {Proc. of the ACM Symposium on User Interface Software and Technology (UIST)}, pages = {155-166}, abstract = {While gaze holds a lot of promise for hands-free interaction with public displays, remote eye trackers with their confined tracking box restrict users to a single stationary position in front of the display. We present EyeScout, an active eye tracking system that combines an eye tracker mounted on a rail system with a computational method to automatically detect and align the tracker with the user’s lateral movement. EyeScout addresses key limitations of current gaze-enabled large public displays by offering two novel gaze-interaction modes for a single user: In "Walk then Interact" the user can walk up to an arbitrary position in front of the display and interact, while in "Walk and Interact" the user can interact even while on the move. We report on a user study that shows that EyeScout is well perceived by users, extends a public display’s sweet spot into a sweet line, and reduces gaze interaction kick- off time to 3.5 seconds - a 62% improvement over state of the art solutions. We discuss sample applications that demonstrate how EyeScout can enable position and movement-independent gaze interaction with large public displays.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } While gaze holds a lot of promise for hands-free interaction with public displays, remote eye trackers with their confined tracking box restrict users to a single stationary position in front of the display. We present EyeScout, an active eye tracking system that combines an eye tracker mounted on a rail system with a computational method to automatically detect and align the tracker with the user’s lateral movement. EyeScout addresses key limitations of current gaze-enabled large public displays by offering two novel gaze-interaction modes for a single user: In "Walk then Interact" the user can walk up to an arbitrary position in front of the display and interact, while in "Walk and Interact" the user can interact even while on the move. We report on a user study that shows that EyeScout is well perceived by users, extends a public display’s sweet spot into a sweet line, and reduces gaze interaction kick- off time to 3.5 seconds - a 62% improvement over state of the art solutions. We discuss sample applications that demonstrate how EyeScout can enable position and movement-independent gaze interaction with large public displays. |
![]() | Xucong Zhang; Yusuke Sugano; Mario Fritz; Andreas Bulling It’s Written All Over Your Face: Full-Face Appearance-Based Gaze Estimation Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 2299-2308, 2017. @inproceedings{zhang17_cvprw, title = {It’s Written All Over Your Face: Full-Face Appearance-Based Gaze Estimation}, author = {Xucong Zhang and Yusuke Sugano and Mario Fritz and Andreas Bulling}, url = {https://wp.mpi-inf.mpg.de/perceptual/files/2017/11/zhang_cvprw2017-6.pdf https://perceptual.mpi-inf.mpg.de/research/datasets/#zhang17_cvprw}, doi = {10.1109/CVPRW.2017.284}, year = {2017}, date = {2017-05-18}, booktitle = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)}, pages = {2299-2308}, abstract = {Eye gaze is an important non-verbal cue for human affect analysis. Recent gaze estimation work indicated that information from the full face region can benefit performance. Pushing this idea further, we propose an appearance-based method that, in contrast to a long-standing line of work in computer vision, only takes the full face image as input. Our method encodes the face image using a convolutional neural network with spatial weights applied on the feature maps to flexibly suppress or enhance information in different facial regions. Through extensive evaluation, we show that our full-face method significantly outperforms the state of the art for both 2D and 3D gaze estimation, achieving improvements of up to 14.3% on MPIIGaze and 27.7% on EYEDIAP for person-independent 3D gaze estimation. We further show that this improvement is consistent across different illumination conditions and gaze directions and par- ticularly pronounced for the most challenging extreme head poses.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Eye gaze is an important non-verbal cue for human affect analysis. Recent gaze estimation work indicated that information from the full face region can benefit performance. Pushing this idea further, we propose an appearance-based method that, in contrast to a long-standing line of work in computer vision, only takes the full face image as input. Our method encodes the face image using a convolutional neural network with spatial weights applied on the feature maps to flexibly suppress or enhance information in different facial regions. Through extensive evaluation, we show that our full-face method significantly outperforms the state of the art for both 2D and 3D gaze estimation, achieving improvements of up to 14.3% on MPIIGaze and 27.7% on EYEDIAP for person-independent 3D gaze estimation. We further show that this improvement is consistent across different illumination conditions and gaze directions and par- ticularly pronounced for the most challenging extreme head poses. |
![]() | Michael Xuelin Huang; Jiajia Li; Grace Ngai; Hong Va Leong ScreenGlint: Practical, In-situ Gaze Estimation on Smartphones Inproceedings Proc. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 2546-2557, 2017. @inproceedings{Huang2017, title = {ScreenGlint: Practical, In-situ Gaze Estimation on Smartphones}, author = {Michael Xuelin Huang and Jiajia Li and Grace Ngai and Hong Va Leong}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/05/screenGlint-v9.pdf}, doi = {10.1145/3025453.3025794}, year = {2017}, date = {2017-05-09}, booktitle = {Proc. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {2546-2557}, abstract = {Gaze estimation has widespread applications. However, little work has explored gaze estimation on smartphones, even though they are fast becoming ubiquitous. This paper presents ScreenGlint, a novel approach which exploits the glint (reflection) of the screen on the user's cornea for gaze estimation, using only the image captured by the front-facing camera. We first conduct a user study on common postures during smartphone use. We then design an experiment to evaluate the accuracy of ScreenGlint under varying face-to-screen distances. An in-depth evaluation involving multiple users is conducted and the impact of head pose variations is investigated. ScreenGlint achieves an overall angular error of 2.44º without head pose variations, and 2.94º with head pose variations. Our technique compares favorably to state-of-the-art research works, indicating that the glint of the screen is an effective and practical cue to gaze estimation on the smartphone platform. We believe that this work can open up new possibilities for practical and ubiquitous gaze-aware applications.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze estimation has widespread applications. However, little work has explored gaze estimation on smartphones, even though they are fast becoming ubiquitous. This paper presents ScreenGlint, a novel approach which exploits the glint (reflection) of the screen on the user's cornea for gaze estimation, using only the image captured by the front-facing camera. We first conduct a user study on common postures during smartphone use. We then design an experiment to evaluate the accuracy of ScreenGlint under varying face-to-screen distances. An in-depth evaluation involving multiple users is conducted and the impact of head pose variations is investigated. ScreenGlint achieves an overall angular error of 2.44º without head pose variations, and 2.94º with head pose variations. Our technique compares favorably to state-of-the-art research works, indicating that the glint of the screen is an effective and practical cue to gaze estimation on the smartphone platform. We believe that this work can open up new possibilities for practical and ubiquitous gaze-aware applications. |
![]() | Nour Karessli; Zeynep Akata; Bernt Schiele; Andreas Bulling Gaze Embeddings for Zero-Shot Image Classification Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6412-6421, 2017. @inproceedings{karessli2017_cvpr, title = {Gaze Embeddings for Zero-Shot Image Classification}, author = {Nour Karessli and Zeynep Akata and Bernt Schiele and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/04/karessli17_cvpr.pdf}, doi = {10.1109/CVPR.2017.679}, year = {2017}, date = {2017-02-28}, booktitle = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, pages = {6412-6421}, abstract = {Zero-shot image classification using auxiliary information, such as attributes describing discriminative object properties, requires time-consuming annotation by domain experts. We instead propose a method that relies on human gaze as auxiliary information, exploiting that even non-expert users have a natural ability to judge class membership. We present a data collection paradigm that involves a discrimination task to increase the information content obtained from gaze data. Our method extracts discriminative descriptors from the data and learns a compatibility function between image and gaze using three novel gaze embeddings: Gaze Histograms (GH), Gaze Features with Grid (GFG) and Gaze Features with Sequence (GFS). We introduce two new gaze-annotated datasets for fine-grained image classification and show that human gaze data is indeed class discriminative, provides a competitive alternative to expert-annotated attributes, and outperforms other baselines for zero-shot image classification.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Zero-shot image classification using auxiliary information, such as attributes describing discriminative object properties, requires time-consuming annotation by domain experts. We instead propose a method that relies on human gaze as auxiliary information, exploiting that even non-expert users have a natural ability to judge class membership. We present a data collection paradigm that involves a discrimination task to increase the information content obtained from gaze data. Our method extracts discriminative descriptors from the data and learns a compatibility function between image and gaze using three novel gaze embeddings: Gaze Histograms (GH), Gaze Features with Grid (GFG) and Gaze Features with Sequence (GFS). We introduce two new gaze-annotated datasets for fine-grained image classification and show that human gaze data is indeed class discriminative, provides a competitive alternative to expert-annotated attributes, and outperforms other baselines for zero-shot image classification. |
![]() | Michaela Klauck; Yusuke Sugano; Andreas Bulling Noticeable or Distractive? A Design Space for Gaze-Contingent User Interface Notifications Inproceedings Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1779-1786, 2017. @inproceedings{klauck17_chi, title = {Noticeable or Distractive? A Design Space for Gaze-Contingent User Interface Notifications}, author = {Michaela Klauck and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/02/klauck17_chi.pdf}, doi = {10.1145/3027063.3053085}, year = {2017}, date = {2017-02-13}, booktitle = {Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {1779-1786}, abstract = {Users are interrupted by an ever-increasing number of notifications, ranging from error messages, over new email or chat alerts, to advertisement pop-ups. We explore gaze-contingent user interfaces notifications that are shown depending on users' current gaze location. Specifically, we evaluate how different design properties influence notification noticeability and distractiveness. We measure noticeability quantitatively by analyzing participants' performance in confirming notifications and distractiveness using a questionnaire. Based on a 12-participant user study on a public display, we show that each of these properties affects noticeability and distractiveness differently and that the properties, in turn, allow for fine-grained optimization of notification display. These findings inform the design of future attentive user interfaces that could optimize the trade-off between, for example, the notification importance and the cost of interruption.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Users are interrupted by an ever-increasing number of notifications, ranging from error messages, over new email or chat alerts, to advertisement pop-ups. We explore gaze-contingent user interfaces notifications that are shown depending on users' current gaze location. Specifically, we evaluate how different design properties influence notification noticeability and distractiveness. We measure noticeability quantitatively by analyzing participants' performance in confirming notifications and distractiveness using a questionnaire. Based on a 12-participant user study on a public display, we show that each of these properties affects noticeability and distractiveness differently and that the properties, in turn, allow for fine-grained optimization of notification display. These findings inform the design of future attentive user interfaces that could optimize the trade-off between, for example, the notification importance and the cost of interruption. |
![]() | Mohamed Khamis; Ludwig Trotter; Markus Tessman; Christina Dannhart; Andreas Bulling; Florian Alt EyeVote in the Wild: Do Users bother Correcting System Errors on Public Displays? Inproceedings Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 57-62, 2016. @inproceedings{khamis16_mum, title = {EyeVote in the Wild: Do Users bother Correcting System Errors on Public Displays?}, author = {Mohamed Khamis and Ludwig Trotter and Markus Tessman and Christina Dannhart and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/11/khamis16_mum.pdf}, doi = {10.1145/3012709.3012743}, year = {2016}, date = {2016-10-10}, booktitle = {Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM)}, pages = {57-62}, abstract = {Although recovering from errors is straightforward on most interfaces, public display systems pose very unique design challenges. Namely, public display users interact for very short amounts of times and are believed to abandon the display when interrupted or forced to deviate from the main task. To date, it is not well understood whether public display designers should enable users to correct errors (e.g. by asking users to confirm or giving them a chance correct their input), or aim for faster interaction and rely on other types of feedback to estimate errors. To close this gap, we conducted a field study where we investigated the users willingness to correct their input on public displays. We report on our findings from an in-the-wild deployment of a public gaze-based voting system where we intentionally evoked system errors to see if users correct them. We found that public display users are willing to correct system errors provided that the correction is fast and straightforward. We discuss how our findings influence the choice of interaction methods for public displays; interaction methods that are highly usable but suffer from low accuracy can still be effective if users can "undo" their interactions.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Although recovering from errors is straightforward on most interfaces, public display systems pose very unique design challenges. Namely, public display users interact for very short amounts of times and are believed to abandon the display when interrupted or forced to deviate from the main task. To date, it is not well understood whether public display designers should enable users to correct errors (e.g. by asking users to confirm or giving them a chance correct their input), or aim for faster interaction and rely on other types of feedback to estimate errors. To close this gap, we conducted a field study where we investigated the users willingness to correct their input on public displays. We report on our findings from an in-the-wild deployment of a public gaze-based voting system where we intentionally evoked system errors to see if users correct them. We found that public display users are willing to correct system errors provided that the correction is fast and straightforward. We discuss how our findings influence the choice of interaction methods for public displays; interaction methods that are highly usable but suffer from low accuracy can still be effective if users can "undo" their interactions. |
![]() | Florian Alt; Mateusz Mikusz; Stefan Schneegass; Andreas Bulling Memorability of Cued-Recall Graphical Passwords with Saliency Masks Inproceedings Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 191-200, 2016. @inproceedings{alt16_mum, title = {Memorability of Cued-Recall Graphical Passwords with Saliency Masks}, author = {Florian Alt and Mateusz Mikusz and Stefan Schneegass and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/wp-content/blogs.dir/12/files/2016/11/alt16_mum.pdf}, doi = {10.1145/3012709.3012730}, year = {2016}, date = {2016-10-10}, booktitle = {Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM)}, pages = {191-200}, abstract = {Cued-recall graphical passwords have a lot of potential for secure user authentication, particularly if combined with saliency masks to prevent users from selecting weak passwords. Saliency masks exclude those areas of the image that are most likely to lead to hotspots and were shown to significantly improve password security. In this paper we investigate the impact of such saliency masks on the memorability of cued-recall graphical passwords. We first conduct two pre-studies with 52 participants to obtain a set of images with three different image complexities as well as real passwords. Based on a month-long user study with 26 participants we then show that cued-recall graphical passwords defined on a single image with a saliency mask are not more difficult to remember than those without saliency mask, and that the complexity of the password images does not have any influence on password memorability. These results complement prior work on the security of such passwords and underline the potential of saliency masks as both a secure and usable improvement to cued-recall gaze-based graphical passwords.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Cued-recall graphical passwords have a lot of potential for secure user authentication, particularly if combined with saliency masks to prevent users from selecting weak passwords. Saliency masks exclude those areas of the image that are most likely to lead to hotspots and were shown to significantly improve password security. In this paper we investigate the impact of such saliency masks on the memorability of cued-recall graphical passwords. We first conduct two pre-studies with 52 participants to obtain a set of images with three different image complexities as well as real passwords. Based on a month-long user study with 26 participants we then show that cued-recall graphical passwords defined on a single image with a saliency mask are not more difficult to remember than those without saliency mask, and that the complexity of the password images does not have any influence on password memorability. These results complement prior work on the security of such passwords and underline the potential of saliency masks as both a secure and usable improvement to cued-recall gaze-based graphical passwords. |
![]() | Murtaza Dhuliawala; Juyoung Lee; Junichi Shimizu; Andreas Bulling; Kai Kunze; Thad Starner; Woontack Woo Smooth Eye Movement Interaction Using EOG Glasses Inproceedings Proc. of the International Conference on Multimodal Interaction (ICMI), pp. 307-311 , 2016. @inproceedings{Dhuliawala16_ICMI, title = {Smooth Eye Movement Interaction Using EOG Glasses}, author = {Murtaza Dhuliawala and Juyoung Lee and Junichi Shimizu and Andreas Bulling and Kai Kunze and Thad Starner and Woontack Woo}, url = {https://perceptual.mpi-inf.mpg.de/wp-content/blogs.dir/12/files/2016/09/dhuliawala16_icmi.pdf}, doi = {10.1145/2993148.2993181}, year = {2016}, date = {2016-08-25}, booktitle = {Proc. of the International Conference on Multimodal Interaction (ICMI)}, pages = {307-311 }, abstract = {Orbits combines a visual display and an eye motion sensor to allow a user to select between options by tracking a cursor with the eyes as the cursor travels in a circular path around each option. Using an off-the-shelf J!NS MEME pair of eyeglasses, we present a pilot study that suggests that the eye movement required for Orbits can be sensed using three electrodes: one in the nose bridge and one in each nose pad. For forced choice binary selection, we achieve a 2.6 bits per second (bps) input rate at 250ms per input. We also introduce Head Orbits, where the user fixates the eyes on a target and moves the head in synchrony with the orbiting target. Measuring only the relative movement of the eyes in relation to the head, this method achieves a maximum rate of 2.0 bps at 500ms per input. Finally, we combine the two techniques together with a gyro to create an interface with a maximum input rate of 5.0 bps.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Orbits combines a visual display and an eye motion sensor to allow a user to select between options by tracking a cursor with the eyes as the cursor travels in a circular path around each option. Using an off-the-shelf J!NS MEME pair of eyeglasses, we present a pilot study that suggests that the eye movement required for Orbits can be sensed using three electrodes: one in the nose bridge and one in each nose pad. For forced choice binary selection, we achieve a 2.6 bits per second (bps) input rate at 250ms per input. We also introduce Head Orbits, where the user fixates the eyes on a target and moves the head in synchrony with the orbiting target. Measuring only the relative movement of the eyes in relation to the head, this method achieves a maximum rate of 2.0 bps at 500ms per input. Finally, we combine the two techniques together with a gyro to create an interface with a maximum input rate of 5.0 bps. |
![]() | Mohamed Khamis; Florian Alt; Andreas Bulling Challenges and Design Space of Gaze-enabled Public Displays Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2016), pp. 1736-1745, 2016. @inproceedings{khamis16_petmei, title = {Challenges and Design Space of Gaze-enabled Public Displays}, author = {Mohamed Khamis and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/07/Khamis2016_UbiComp.pdf}, doi = {10.1145/2968219.2968342}, year = {2016}, date = {2016-07-25}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2016)}, pages = {1736-1745}, abstract = {Gaze is an attractive modality for public displays, hence the recent years saw an increase in deployments of gaze-enabled public displays. Although gaze has been thoroughly investigated for desktop scenarios, gaze-enabled public displays present new challenges that are unique to this setup. In contrast to desktop settings, public displays (1) cannot afford requiring eye tracker calibration, (2) expect users to interact from different positions, and (3) expect multiple users to interact simultaneously. In this work we discuss these challenges, and explore the design space of gaze-enabled public displays. We conclude by discussing how the current state of research stands wrt. the identified challenges, and highlight directions for future work.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze is an attractive modality for public displays, hence the recent years saw an increase in deployments of gaze-enabled public displays. Although gaze has been thoroughly investigated for desktop scenarios, gaze-enabled public displays present new challenges that are unique to this setup. In contrast to desktop settings, public displays (1) cannot afford requiring eye tracker calibration, (2) expect users to interact from different positions, and (3) expect multiple users to interact simultaneously. In this work we discuss these challenges, and explore the design space of gaze-enabled public displays. We conclude by discussing how the current state of research stands wrt. the identified challenges, and highlight directions for future work. |
![]() | Erroll Wood; Tadas Baltrusaitis; Louis-Philippe Morency; Peter Robinson; Andreas Bulling A 3D Morphable Eye Region Model for Gaze Estimation Inproceedings Proc. of the European Conference on Computer Vision (ECCV), pp. 297-313, 2016. @inproceedings{wood16_eccv, title = {A 3D Morphable Eye Region Model for Gaze Estimation}, author = {Erroll Wood and Tadas Baltrusaitis and Louis-Philippe Morency and Peter Robinson and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2017/02/wood16_eccv.pdf https://www.youtube.com/watch?v=n_htSvUq7iU}, doi = {10.1007/978-3-319-46448-0_18}, year = {2016}, date = {2016-07-11}, booktitle = {Proc. of the European Conference on Computer Vision (ECCV)}, pages = {297-313}, abstract = {Morphable face models are a powerful tool, but have previ- ously failed to model the eye accurately due to complexities in its material and motion. We present a new multi-part model of the eye that includes a morphable model of the facial eye region, as well as an anatomy-based eyeball model. It is the first morphable model that accurately captures eye region shape, since it was built from high-quality head scans. It is also the first to allow independent eyeball movement, since we treat it as a separate part. To showcase our model we present a new method for illumination- and head-pose–invariant gaze estimation from a single RGB image. We fit our model to an image through analysis-by-synthesis, solving for eye region shape, texture, eyeball pose, and illumination simul- taneously. The fitted eyeball pose parameters are then used to estimate gaze direction. Through evaluation on two standard datasets we show that our method generalizes to both webcam and high-quality camera images, and outperforms a state-of-the-art CNN method achieving a gaze estimation accuracy of 9.44° in a challenging user-independent scenario.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Morphable face models are a powerful tool, but have previ- ously failed to model the eye accurately due to complexities in its material and motion. We present a new multi-part model of the eye that includes a morphable model of the facial eye region, as well as an anatomy-based eyeball model. It is the first morphable model that accurately captures eye region shape, since it was built from high-quality head scans. It is also the first to allow independent eyeball movement, since we treat it as a separate part. To showcase our model we present a new method for illumination- and head-pose–invariant gaze estimation from a single RGB image. We fit our model to an image through analysis-by-synthesis, solving for eye region shape, texture, eyeball pose, and illumination simul- taneously. The fitted eyeball pose parameters are then used to estimate gaze direction. Through evaluation on two standard datasets we show that our method generalizes to both webcam and high-quality camera images, and outperforms a state-of-the-art CNN method achieving a gaze estimation accuracy of 9.44° in a challenging user-independent scenario. |
![]() | Yusuke Sugano; Xucong Zhang; Andreas Bulling AggreGaze: Collective Estimation of Audience Attention on Public Displays Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 821-831, 2016, (best paper honourable mention award). @inproceedings{sugano16_uist, title = {AggreGaze: Collective Estimation of Audience Attention on Public Displays}, author = {Yusuke Sugano and Xucong Zhang and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/09/sugano16_uist.pdf https://www.youtube.com/watch?v=eFK39S_lgdg http://s2017.siggraph.org/acm-siggraph-organization-events/sessions/uist-reprise-siggraph-2017}, doi = {10.1145/2984511.2984536}, year = {2016}, date = {2016-06-26}, booktitle = {Proc. of the ACM Symposium on User Interface Software and Technology (UIST)}, pages = {821-831}, abstract = {Gaze is frequently explored in public display research given its importance for monitoring and analysing audience attention. However, current gaze-enabled public display interfaces require either special-purpose eye tracking equipment or explicit personal calibration for each individual user. We present AggreGaze, a novel method for estimating spatio-temporal audience attention on public displays. Our method requires only a single off-the-shelf camera attached to the display, does not require any personal calibration, and provides visual attention estimates across the full display. We achieve this by 1) compensating for errors of state-of-the-art appearance-based gaze estimation methods through on-site training data collection, and by 2) aggregating uncalibrated and thus inaccurate gaze estimates of multiple users into joint attention estimates. We propose different visual stimuli for this compensation: a standard 9-point calibration, moving targets, text and visual stimuli embedded into the display content, as well as normal video content. Based on a two-week deployment in a public space, we demonstrate the effectiveness of our method for estimating attention maps that closely resemble ground-truth audience gaze distributions.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Gaze is frequently explored in public display research given its importance for monitoring and analysing audience attention. However, current gaze-enabled public display interfaces require either special-purpose eye tracking equipment or explicit personal calibration for each individual user. We present AggreGaze, a novel method for estimating spatio-temporal audience attention on public displays. Our method requires only a single off-the-shelf camera attached to the display, does not require any personal calibration, and provides visual attention estimates across the full display. We achieve this by 1) compensating for errors of state-of-the-art appearance-based gaze estimation methods through on-site training data collection, and by 2) aggregating uncalibrated and thus inaccurate gaze estimates of multiple users into joint attention estimates. We propose different visual stimuli for this compensation: a standard 9-point calibration, moving targets, text and visual stimuli embedded into the display content, as well as normal video content. Based on a two-week deployment in a public space, we demonstrate the effectiveness of our method for estimating attention maps that closely resemble ground-truth audience gaze distributions. |
![]() | Mohamed Khamis; Ozan Saltuk; Alina Hang; Katharina Stolz; Andreas Bulling; Florian Alt TextPursuits: Using Text for Pursuits-Based Interaction and Calibration on Public Displays Inproceedings Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp), pp. 274-285, 2016. @inproceedings{Khamis16_ubicomp, title = {TextPursuits: Using Text for Pursuits-Based Interaction and Calibration on Public Displays}, author = {Mohamed Khamis and Ozan Saltuk and Alina Hang and Katharina Stolz and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/08/khamis16_ubicomp.pdf}, doi = {10.1145/2971648.2971679}, year = {2016}, date = {2016-06-08}, booktitle = {Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp)}, journal = {Proc. ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp)}, pages = {274-285}, abstract = {Pursuits, a technique that correlates users’ eye movements with moving on-screen targets, was recently introduced for calibration-free interaction with public displays. While prior work used abstract objects or dots as targets, we explore the use of Pursuits with text (read-and-pursue). Given that much of the content on public displays includes text, designers could greatly benefit from users being able to spontaneously interact and implicitly calibrate an eye tracker while simply read- ing text on a display. At the same time, using Pursuits with textual content is challenging given that the eye movements performed while reading interfere with the pursuit movements. We present two systems, EyeVote and Read2Calibrate, that enable spontaneous gaze interaction and implicit calibration by reading text. Results from two user studies (N=37) show that Pursuits with text is feasible and can achieve similar accu- racy as non text-based pursuit approaches. While calibration is less accurate, it integrates smoothly with reading and allows to identify areas of the display the user is looking at.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Pursuits, a technique that correlates users’ eye movements with moving on-screen targets, was recently introduced for calibration-free interaction with public displays. While prior work used abstract objects or dots as targets, we explore the use of Pursuits with text (read-and-pursue). Given that much of the content on public displays includes text, designers could greatly benefit from users being able to spontaneously interact and implicitly calibrate an eye tracker while simply read- ing text on a display. At the same time, using Pursuits with textual content is challenging given that the eye movements performed while reading interfere with the pursuit movements. We present two systems, EyeVote and Read2Calibrate, that enable spontaneous gaze interaction and implicit calibration by reading text. Results from two user studies (N=37) show that Pursuits with text is feasible and can achieve similar accu- racy as non text-based pursuit approaches. While calibration is less accurate, it integrates smoothly with reading and allows to identify areas of the display the user is looking at. |
![]() | Sreyasi Nag Chowdhury; Mateusz Malinowski; Andreas Bulling; Mario Fritz Xplore-M-Ego: Contextual Media Retrieval Using Natural Language Queries Inproceedings Proc. of the ACM International Conference on Multimedia Retrieval (ICMR), pp. 243-247, 2016, ISBN: 978-1-4503-4359-6. @inproceedings{chowdhury16_icmr, title = {Xplore-M-Ego: Contextual Media Retrieval Using Natural Language Queries}, author = {Sreyasi Nag Chowdhury and Mateusz Malinowski and Andreas Bulling and Mario Fritz}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/05/chowdhury16_icmr.pdf}, doi = {10.1145/2911996.2912044}, isbn = {978-1-4503-4359-6}, year = {2016}, date = {2016-06-06}, booktitle = {Proc. of the ACM International Conference on Multimedia Retrieval (ICMR)}, pages = {243-247}, abstract = {The widespread integration of cameras in hand-held and head-worn devices and the ability to share content online enables a large and diverse visual capture of the world that millions of users build up collectively every day. We envision these images as well as associated meta information, such as GPS coordinates and timestamps, to form a collective visual memory that can be queried while automatically taking the ever-changing context of mobile users into account. As a first step towards this vision, in this work we present Xplore-M-Ego: a novel media retrieval system that allows users to query a dynamic database of images using spatio-temporal natural language queries. We evaluate our system using a new dataset of real image queries as well as through a usability study. One key finding is that there is a considerable amount of inter-user variability in the resolution of spatial relations in natural language utterances. We show that our system can cope with this variability using personalisation through an online learning-based retrieval formulation.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The widespread integration of cameras in hand-held and head-worn devices and the ability to share content online enables a large and diverse visual capture of the world that millions of users build up collectively every day. We envision these images as well as associated meta information, such as GPS coordinates and timestamps, to form a collective visual memory that can be queried while automatically taking the ever-changing context of mobile users into account. As a first step towards this vision, in this work we present Xplore-M-Ego: a novel media retrieval system that allows users to query a dynamic database of images using spatio-temporal natural language queries. We evaluate our system using a new dataset of real image queries as well as through a usability study. One key finding is that there is a considerable amount of inter-user variability in the resolution of spatial relations in natural language utterances. We show that our system can cope with this variability using personalisation through an online learning-based retrieval formulation. |
![]() | Florian Alt; Andreas Bulling; Lukas Mecke; Daniel Buschek Attention, please! Comparing Features for Measuring Audience Attention Towards Pervasive Displays Inproceedings Proc. of the ACM SIGCHI Conference on Designing Interactive Systems (DIS), pp. 823-828, 2016, ISBN: 978-1-4503-4031-1. @inproceedings{alt16_dis, title = {Attention, please! Comparing Features for Measuring Audience Attention Towards Pervasive Displays}, author = {Florian Alt and Andreas Bulling and Lukas Mecke and Daniel Buschek}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/04/alt16_dis.pdf}, doi = {10.1145/2901790.2901897}, isbn = {978-1-4503-4031-1}, year = {2016}, date = {2016-06-04}, booktitle = {Proc. of the ACM SIGCHI Conference on Designing Interactive Systems (DIS)}, pages = {823-828}, abstract = {Measuring audience attention towards pervasive displays is important but accurate measurement in real time remains a significant sensing challenge. Consequently, researchers and practitioners typically use other features, such as face presence, as a proxy. We provide a principled comparison of the performance of six features and their combinations for measuring attention: face presence, movement trajectory, walking speed, shoulder orientation, head pose, and gaze direction. We implemented a prototype that is capable of capturing this rich set of features from video and depth camera data. Using a controlled lab experiment (N=18) we show that as a single feature, face presence is indeed among the most accurate. We further show that accuracy can be increased through a combination of features (+10.3%), knowledge about the audience (+63.8%), as well as user identities (+69.0%). Our findings are valuable for display providers who want to collect data on display effectiveness or build interactive, responsive apps.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Measuring audience attention towards pervasive displays is important but accurate measurement in real time remains a significant sensing challenge. Consequently, researchers and practitioners typically use other features, such as face presence, as a proxy. We provide a principled comparison of the performance of six features and their combinations for measuring attention: face presence, movement trajectory, walking speed, shoulder orientation, head pose, and gaze direction. We implemented a prototype that is capable of capturing this rich set of features from video and depth camera data. Using a controlled lab experiment (N=18) we show that as a single feature, face presence is indeed among the most accurate. We further show that accuracy can be increased through a combination of features (+10.3%), knowledge about the audience (+63.8%), as well as user identities (+69.0%). Our findings are valuable for display providers who want to collect data on display effectiveness or build interactive, responsive apps. |
![]() | Adalberto L. Simeone; Andreas Bulling; Jason Alexander; Hans Gellersen Three-Point Interaction: Combining Bi-manual Direct Touch with Gaze Inproceedings Proc. of the International Working Conference on Advanced Visual Interfaces (AVI), pp. 168-175, 2016. @inproceedings{simeone16_avi, title = {Three-Point Interaction: Combining Bi-manual Direct Touch with Gaze}, author = {Adalberto L. Simeone and Andreas Bulling and Jason Alexander and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/wp-content/blogs.dir/12/files/2016/05/simeone16_avi.pdf}, doi = {10.1145/2909132.2909251}, year = {2016}, date = {2016-05-12}, booktitle = {Proc. of the International Working Conference on Advanced Visual Interfaces (AVI)}, pages = {168-175}, abstract = {The benefits of two-point interaction for tasks that require users to simultaneously manipulate multiple entities or dimensions are widely known. Two-point interaction has become common, e.g., when zooming or pinching using two fingers on a smartphone. We propose a novel interaction technique that implements three-point interaction by augmenting two-finger direct touch with gaze as a third input channel. We evaluate two key characteristics of our technique in two multi-participant user studies. In the first, we used the technique for object selection. In the second, we evaluate it in a 3D matching task that requires simultaneous continuous input from fingers and the eyes. Our results show that in both cases participants learned to interact with three input channels without cognitive or mental overload. Participants' performance tended towards fast selection times in the first study and exhibited parallel interaction in the second. These results are promising and show that there is scope for additional input channels beyond two-point interaction.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The benefits of two-point interaction for tasks that require users to simultaneously manipulate multiple entities or dimensions are widely known. Two-point interaction has become common, e.g., when zooming or pinching using two fingers on a smartphone. We propose a novel interaction technique that implements three-point interaction by augmenting two-finger direct touch with gaze as a third input channel. We evaluate two key characteristics of our technique in two multi-participant user studies. In the first, we used the technique for object selection. In the second, we evaluate it in a 3D matching task that requires simultaneous continuous input from fingers and the eyes. Our results show that in both cases participants learned to interact with three input channels without cognitive or mental overload. Participants' performance tended towards fast selection times in the first study and exhibited parallel interaction in the second. These results are promising and show that there is scope for additional input channels beyond two-point interaction. |
![]() | Stefan Schneegass; Youssef Oualil; Andreas Bulling SkullConduct: Biometric User Identification on Eyewear Computers Using Bone Conduction Through the Skull Inproceedings Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1379-1384, 2016, ISBN: 978-1-4503-3362-7. @inproceedings{schneegass16_chi, title = {SkullConduct: Biometric User Identification on Eyewear Computers Using Bone Conduction Through the Skull}, author = {Stefan Schneegass and Youssef Oualil and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/01/schneegass16_chi.pdf https://www.youtube.com/watch?v=A4BCnsQmo6c https://www.newscientist.com/article/2085430-the-buzz-of-your-skull-can-be-used-to-tell-exactly-who-you-are/ http://www.golem.de/news/skullconduct-der-schaedel-meldet-den-nutzer-an-der-datenbrille-an-1605-120892.html http://gizmodo.com/youll-never-forget-your-password-when-its-the-sound-you-1772327137 https://www.washingtonpost.com/news/the-switch/wp/2016/04/22/could-skull-echos-and-brainprints-replace-the-password/ http://www.computerwelt.at/news/technologie-strategie/security/detail/artikel/115454-entsperrung-durch-schaedelknochen-loest-passwoerter-ab/ http://www.uni-saarland.de/nc/aktuelles/artikel/nr/14597.html}, doi = {10.1145/2858036.2858152}, isbn = {978-1-4503-3362-7}, year = {2016}, date = {2016-05-07}, booktitle = {Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {1379-1384}, abstract = {Secure user identification is important for the increasing number of eyewear computers but limited input capabilities pose significant usability challenges for established knowledge-based schemes, such as a passwords or PINs. We present SkullConduct, a biometric system that uses bone conduction of sound through the user's skull as well as a microphone readily integrated into many of these devices, such as Google Glass. At the core of SkullConduct is a method to analyze the characteristic frequency response created by the user's skull using a combination of Mel Frequency Cepstral Coefficient (MFCC) features as well as a computationally light-weight 1NN classifier. We report on a controlled experiment with 10 participants that shows that this frequency response is person-specific and stable - even when taking off and putting on the device multiple times - and thus serves as a robust biometric. We show that our method can identify users with 97.0% accuracy and authenticate them with an equal error rate of 6.9%, thereby bringing biometric user identification to eyewear computers equipped with bone conduction technology.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Secure user identification is important for the increasing number of eyewear computers but limited input capabilities pose significant usability challenges for established knowledge-based schemes, such as a passwords or PINs. We present SkullConduct, a biometric system that uses bone conduction of sound through the user's skull as well as a microphone readily integrated into many of these devices, such as Google Glass. At the core of SkullConduct is a method to analyze the characteristic frequency response created by the user's skull using a combination of Mel Frequency Cepstral Coefficient (MFCC) features as well as a computationally light-weight 1NN classifier. We report on a controlled experiment with 10 participants that shows that this frequency response is person-specific and stable - even when taking off and putting on the device multiple times - and thus serves as a robust biometric. We show that our method can identify users with 97.0% accuracy and authenticate them with an equal error rate of 6.9%, thereby bringing biometric user identification to eyewear computers equipped with bone conduction technology.
|
![]() | Pingmei Xu; Yusuke Sugano; Andreas Bulling Spatio-Temporal Modeling and Prediction of Visual Attention in Graphical User Interfaces Inproceedings Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 3299-3310, 2016, ISBN: 978-1-4503-3362-7 , (best paper honourable mention award). @inproceedings{xu16_chi, title = {Spatio-Temporal Modeling and Prediction of Visual Attention in Graphical User Interfaces}, author = {Pingmei Xu and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/02/xu16_chi.pdf}, doi = {10.1145/2858036.2858479}, isbn = {978-1-4503-3362-7 }, year = {2016}, date = {2016-05-07}, booktitle = {Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {3299-3310}, abstract = {We present a computational model to predict users' spatio-temporal visual attention for WIMP-style (windows, icons, mouse, pointer) graphical user interfaces. Like existing models of bottom-up visual attention in computer vision, our model does not require any eye tracking equipment. Instead, it predicts attention solely using information available to the interface, specifically users' mouse and keyboard input as well as the UI components they interact with. To study our model in a principled way we further introduce a method to synthesize user interface layouts that are functionally equivalent to real-world interfaces, such as from Gmail, Facebook, or GitHub. We first quantitatively analyze attention allocation and its correlation with user input and UI components using ground-truth gaze, mouse, and keyboard data of 18 participants performing a text editing task. We then show that our model predicts attention maps more accurately than state-of-the-art methods. Our results underline the significant potential of spatio-temporal attention modeling for user interface evaluation, optimization, or even simulation.}, note = {best paper honourable mention award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We present a computational model to predict users' spatio-temporal visual attention for WIMP-style (windows, icons, mouse, pointer) graphical user interfaces. Like existing models of bottom-up visual attention in computer vision, our model does not require any eye tracking equipment. Instead, it predicts attention solely using information available to the interface, specifically users' mouse and keyboard input as well as the UI components they interact with. To study our model in a principled way we further introduce a method to synthesize user interface layouts that are functionally equivalent to real-world interfaces, such as from Gmail, Facebook, or GitHub. We first quantitatively analyze attention allocation and its correlation with user input and UI components using ground-truth gaze, mouse, and keyboard data of 18 participants performing a text editing task. We then show that our model predicts attention maps more accurately than state-of-the-art methods. Our results underline the significant potential of spatio-temporal attention modeling for user interface evaluation, optimization, or even simulation. |
![]() | Mohamed Khamis; Florian Alt; Mariam Hassib; Emanuel von Zezschwitz; Regina Hasholzner; Andreas Bulling GazeTouchPass: Multimodal Authentication Using Gaze and Touch on Mobile Devices Inproceedings Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 2156-2164, 2016, ISBN: 978-1-4503-4082-3. @inproceedings{khamis16_chi, title = {GazeTouchPass: Multimodal Authentication Using Gaze and Touch on Mobile Devices}, author = {Mohamed Khamis and Florian Alt and Mariam Hassib and Emanuel von Zezschwitz and Regina Hasholzner and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/02/khamis16_chi.pdf}, doi = {10.1145/2851581.2892314}, isbn = {978-1-4503-4082-3}, year = {2016}, date = {2016-05-07}, booktitle = {Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {2156-2164}, abstract = {We propose a multimodal scheme, GazeTouchPass, that combines gaze and touch for shoulder-surfing resistant user authentication on mobile devices. GazeTouchPass allows passwords with multiple switches between input modalities during authentication. This requires attackers to simultaneously observe the device screen and the user's eyes to find the password. We evaluate the security and usability of GazeTouchPass in two user studies. Our findings show that GazeTouchPass is usable and significantly more secure than single-modal authentication against basic and even advanced shoulder-surfing attacks.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We propose a multimodal scheme, GazeTouchPass, that combines gaze and touch for shoulder-surfing resistant user authentication on mobile devices. GazeTouchPass allows passwords with multiple switches between input modalities during authentication. This requires attackers to simultaneously observe the device screen and the user's eyes to find the password. We evaluate the security and usability of GazeTouchPass in two user studies. Our findings show that GazeTouchPass is usable and significantly more secure than single-modal authentication against basic and even advanced shoulder-surfing attacks. |
![]() | Dominik Kirst; Andreas Bulling On the Verge: Voluntary Convergences for Accurate and Precise Timing of Gaze Input Inproceedings Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1519-1525, 2016, ISBN: 978-1-4503-4082-3. @inproceedings{kirst16_chi, title = {On the Verge: Voluntary Convergences for Accurate and Precise Timing of Gaze Input}, author = { Dominik Kirst and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/02/kirst16_chi.pdf}, doi = {10.1145/2851581.2892307}, isbn = {978-1-4503-4082-3}, year = {2016}, date = {2016-05-07}, booktitle = {Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI)}, pages = {1519-1525}, abstract = {The problem of triggering input accurately (with a small temporal offset) and precisely (with high repeatability) at a specific point in time has so far been largely ignored in gaze interaction research. We explore voluntary eye convergences as a novel interaction technique for precise and accurate timing of gaze input and a solution to the "Midas touch" problem, i.e. the accidental triggering of input when looking at an interface. We introduce a novel clock paradigm to study input timing and demonstrate that voluntary convergences are significantly more accurate and precise than common gaze dwelling. Our findings suggest that voluntary convergences are well-suited for applications in which timing of user input is important, thereby complementing existing gaze techniques that focus on speed and spatial precision.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The problem of triggering input accurately (with a small temporal offset) and precisely (with high repeatability) at a specific point in time has so far been largely ignored in gaze interaction research. We explore voluntary eye convergences as a novel interaction technique for precise and accurate timing of gaze input and a solution to the "Midas touch" problem, i.e. the accidental triggering of input when looking at an interface. We introduce a novel clock paradigm to study input timing and demonstrate that voluntary convergences are significantly more accurate and precise than common gaze dwelling. Our findings suggest that voluntary convergences are well-suited for applications in which timing of user input is important, thereby complementing existing gaze techniques that focus on speed and spatial precision. |
![]() | Daniel Pohl; Xucong Zhang; Andreas Bulling Combining Eye Tracking with Optimizations for Lens Astigmatism in Modern Wide-Angle HMDs Inproceedings Proc. of the IEEE Conference on Virtual Reality (VR), pp. 269-270, 2016. @inproceedings{pohl16_vr, title = {Combining Eye Tracking with Optimizations for Lens Astigmatism in Modern Wide-Angle HMDs}, author = {Daniel Pohl and Xucong Zhang and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/01/Pohl16_VR.pdf}, doi = {10.1109/VR.2016.7504757}, year = {2016}, date = {2016-03-19}, booktitle = {Proc. of the IEEE Conference on Virtual Reality (VR)}, pages = {269-270}, abstract = {Virtual Reality has hit the consumer market with affordable head-mounted displays. When using these, it quickly becomes apparent that the resolution of the built-in display panels still needs to be highly increased. To overcome the resulting higher performance demands, eye tracking can be used for foveated rendering. However, as there are lens distortions in HMDs, there are more possibilities to increase the performance with smarter rendering approaches. We present a new system using optimizations for rendering considering lens astigmatism and combining this with foveated rendering through eye tracking. Depending on the current eye gaze, this delivers a rendering speed-up of up to 20%.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Virtual Reality has hit the consumer market with affordable head-mounted displays. When using these, it quickly becomes apparent that the resolution of the built-in display panels still needs to be highly increased. To overcome the resulting higher performance demands, eye tracking can be used for foveated rendering. However, as there are lens distortions in HMDs, there are more possibilities to increase the performance with smarter rendering approaches. We present a new system using optimizations for rendering considering lens astigmatism and combining this with foveated rendering through eye tracking. Depending on the current eye gaze, this delivers a rendering speed-up of up to 20%. |
Conference
![]() | Can Privacy-Aware Lifelogs Alter Our Memories? Inproceedings Forthcoming Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), Forthcoming. |
![]() | PrivacEye: Privacy-Preserving Head-Mounted Eye Tracking Using Egocentric Scene Image and Eye Movement Features Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019, (best video award). |
![]() | Privacy-Aware Eye Tracking Using Differential Privacy Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019, (best paper award). |
![]() | Reducing Calibration Drift in Mobile Eye Trackers by Exploiting Mobile Phone Usage Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), 2019. |
A fast approach to refraction-aware 3D eye-model fitting and gaze prediction Inproceedings Forthcoming Proc. International Symposium on Eye Tracking Research and Applications (ETRA), Forthcoming. | |
![]() | Evaluation of Appearance-Based Methods and Implications for Gaze-Based Applications Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019. |
![]() | A Design Space for Gaze Interaction on Head-mounted Displays Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), 2019. |
![]() | Towards a Symbiotic Human-Machine Depth Sensor: Exploring 3D Gaze for Object Reconstruction Inproceedings Adj. Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 114-116, 2018. |
![]() | GazeDrone: Mobile Eye-Based Interaction in Public Space Without Augmenting the User Inproceedings Proc. of the ACM Workshop on Micro Aerial Vehicle Networks, Systems, and Applications (DroNet), pp. 66-71, 2018. |
![]() | Forecasting User Attention During Everyday Mobile Interactions Using Device-Integrated and Wearable Sensors Inproceedings Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI), pp. 1:1–1:13, 2018, (best paper award). |
![]() | The Past, Present, and Future of Gaze-enabled Handheld Mobile Devices: Survey and Lessons Learned Inproceedings Proc. International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI), pp. 38:1–38:17, 2018, (best paper honourable mention award). |
![]() | Fixation Detection for Head-Mounted Eye Tracking Based on Visual Similarity of Gaze Targets Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 23:1-23:9, 2018. |
![]() | Error-Aware Gaze-Based Interfaces for Robust Mobile Gaze Interaction Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 24:1-24:10, 2018, (best paper award). |
![]() | Revisiting Data Normalization for Appearance-Based Gaze Estimation Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 12:1-12:9, 2018. |
![]() | A novel approach to single camera, glint-free 3D eye model fitting including corneal refraction Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 9:1-9:9, 2018. |
![]() | Robust Eye Contact Detection in Natural Multi-Person Interactions Using Gaze and Speaking Behaviour Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 31:1-31:10, 2018. |
![]() | Learning to Find Eye Region Landmarks for Remote Gaze Estimation in Unconstrained Settings Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 21:1-21:10, 2018, (best presentation award). |
![]() | Hidden Pursuits: Evaluating Gaze-selection via Pursuits when the Stimulus Trajectory is Partially Hidden Inproceedings Proc. International Symposium on Eye Tracking Research and Applications (ETRA), pp. 27:1-27:5, 2018. |
![]() | VRPursuits: Interaction in Virtual Reality using Smooth Pursuit Eye Movements Inproceedings Proc. International Conference on Advanced Visual Interfaces (AVI), pp. 18:1-18:8, 2018. |
![]() | Detecting Low Rapport During Natural Interactions in Small Groups from Non-Verbal Behaviour Inproceedings Proc. ACM International Conference on Intelligent User Interfaces (IUI), pp. 153-164, 2018. |
![]() | Understanding Face and Eye Visibility in Front-Facing Cameras of Smartphones used in the Wild Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 280:1-280:12, 2018. |
![]() | Which one is me? Identifying Oneself on Public Displays Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 287:1-287:12, 2018, (best paper honourable mention award). |
![]() | Training Person-Specific Gaze Estimators from Interactions with Multiple Devices Inproceedings Proc. ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 624:1-624:12, 2018. |
![]() | A Multimodal Corpus of Expert Gaze and Behavior during Phonetic Segmentation Tasks Inproceedings Proc. Language Resources and Evaluation Conference (LREC), 2018. |
![]() | They are all after you: Investigating the Viability of a Threat Model that involves Multiple Shoulder Surfers Inproceedings Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 31-35, 2017, (best paper honourable mention award). |
![]() | EyeMirror: Mobile Calibration-Free Gaze Approximation using Corneal Imaging Inproceedings Proc. of the International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 279-291, 2017. |
![]() | GazeTouchPIN: Protecting Sensitive Data on Mobile Devices using Secure Multimodal Authentication Inproceedings Proc. of the 19th ACM International Conference on Multimodal Interaction (ICMI), pp. 446-450, 2017. |
![]() | Predicting the Category and Attributes of Visual Search Targets Using Deep Gaze Pooling Inproceedings Proc. of the IEEE International Conference on Computer Vision Workshops (ICCVW), pp. 2740-2748, 2017. |
![]() | GTmoPass: Two-factor Authentication on Public Displays Using GazeTouch passwords and Personal Mobile Devices Inproceedings Proc. of the ACM International Symposium on Pervasive Displays (PerDis), pp. 8:1-8:9, 2017. |
![]() | Everyday Eye Contact Detection Using Unsupervised Gaze Target Discovery Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 193-203, 2017, (best paper honourable mention award). |
![]() | EyeScout: Active Eye Tracking for Position and Movement Independent Gaze Interaction with Large Public Displays Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 155-166, 2017. |
![]() | It’s Written All Over Your Face: Full-Face Appearance-Based Gaze Estimation Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 2299-2308, 2017. |
![]() | ScreenGlint: Practical, In-situ Gaze Estimation on Smartphones Inproceedings Proc. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 2546-2557, 2017. |
![]() | Gaze Embeddings for Zero-Shot Image Classification Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6412-6421, 2017. |
![]() | Noticeable or Distractive? A Design Space for Gaze-Contingent User Interface Notifications Inproceedings Ext. Abstr. of the ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1779-1786, 2017. |
![]() | EyeVote in the Wild: Do Users bother Correcting System Errors on Public Displays? Inproceedings Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 57-62, 2016. |
![]() | Memorability of Cued-Recall Graphical Passwords with Saliency Masks Inproceedings Proc. of the 15th International Conference on Mobile and Ubiquitous Multimedia (MUM), pp. 191-200, 2016. |
![]() | Smooth Eye Movement Interaction Using EOG Glasses Inproceedings Proc. of the International Conference on Multimodal Interaction (ICMI), pp. 307-311 , 2016. |
![]() | Challenges and Design Space of Gaze-enabled Public Displays Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2016), pp. 1736-1745, 2016. |
![]() | A 3D Morphable Eye Region Model for Gaze Estimation Inproceedings Proc. of the European Conference on Computer Vision (ECCV), pp. 297-313, 2016. |
![]() | AggreGaze: Collective Estimation of Audience Attention on Public Displays Inproceedings Proc. of the ACM Symposium on User Interface Software and Technology (UIST), pp. 821-831, 2016, (best paper honourable mention award). |
![]() | TextPursuits: Using Text for Pursuits-Based Interaction and Calibration on Public Displays Inproceedings Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp), pp. 274-285, 2016. |
![]() | Xplore-M-Ego: Contextual Media Retrieval Using Natural Language Queries Inproceedings Proc. of the ACM International Conference on Multimedia Retrieval (ICMR), pp. 243-247, 2016, ISBN: 978-1-4503-4359-6. |
![]() | Attention, please! Comparing Features for Measuring Audience Attention Towards Pervasive Displays Inproceedings Proc. of the ACM SIGCHI Conference on Designing Interactive Systems (DIS), pp. 823-828, 2016, ISBN: 978-1-4503-4031-1. |
![]() | Three-Point Interaction: Combining Bi-manual Direct Touch with Gaze Inproceedings Proc. of the International Working Conference on Advanced Visual Interfaces (AVI), pp. 168-175, 2016. |
![]() | SkullConduct: Biometric User Identification on Eyewear Computers Using Bone Conduction Through the Skull Inproceedings Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1379-1384, 2016, ISBN: 978-1-4503-3362-7. |
![]() | Spatio-Temporal Modeling and Prediction of Visual Attention in Graphical User Interfaces Inproceedings Proc. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 3299-3310, 2016, ISBN: 978-1-4503-3362-7 , (best paper honourable mention award). |
![]() | GazeTouchPass: Multimodal Authentication Using Gaze and Touch on Mobile Devices Inproceedings Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 2156-2164, 2016, ISBN: 978-1-4503-4082-3. |
![]() | On the Verge: Voluntary Convergences for Accurate and Precise Timing of Gaze Input Inproceedings Ext. Abstr. of the 34th ACM SIGCHI Conference on Human Factors in Computing Systems (CHI), pp. 1519-1525, 2016, ISBN: 978-1-4503-4082-3. |
![]() | Combining Eye Tracking with Optimizations for Lens Astigmatism in Modern Wide-Angle HMDs Inproceedings Proc. of the IEEE Conference on Virtual Reality (VR), pp. 269-270, 2016. |