2020import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoProgress ;
2121import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoRequest ;
2222import com .google .cloud .videointelligence .v1p1beta1 .AnnotateVideoResponse ;
23- import com .google .cloud .videointelligence .v1p1beta1 .EmotionAttribute ;
24- import com .google .cloud .videointelligence .v1p1beta1 .FaceConfig ;
25- import com .google .cloud .videointelligence .v1p1beta1 .FaceDetectionAnnotation ;
26- import com .google .cloud .videointelligence .v1p1beta1 .FaceDetectionFrame ;
27- import com .google .cloud .videointelligence .v1p1beta1 .FaceSegment ;
2823import com .google .cloud .videointelligence .v1p1beta1 .Feature ;
29- import com .google .cloud .videointelligence .v1p1beta1 .NormalizedBoundingBox ;
3024import com .google .cloud .videointelligence .v1p1beta1 .SpeechRecognitionAlternative ;
3125import com .google .cloud .videointelligence .v1p1beta1 .SpeechTranscription ;
3226import com .google .cloud .videointelligence .v1p1beta1 .SpeechTranscriptionConfig ;
3933
4034public class Detect {
4135 /**
42- * Detects face's bounding boxes, emotions, and video transcription using the Video Intelligence
36+ * Detects video transcription using the Video Intelligence
4337 * API
4438 * @param args specifies features to detect and the path to the video on Google Cloud Storage.
4539 */
@@ -64,7 +58,7 @@ public static void argsHelper(String[] args) throws Exception {
6458 System .out .printf (
6559 "\t java %s \" <command>\" \" <path-to-video>\" \n "
6660 + "Commands:\n "
67- + "\t faces-bounding-boxes | faces-emotions | speech -transcription\n "
61+ + "\t speech -transcription\n "
6862 + "Path:\n \t A URI for a Cloud Storage resource (gs://...)\n "
6963 + "Examples: " ,
7064 Detect .class .getCanonicalName ());
@@ -73,175 +67,11 @@ public static void argsHelper(String[] args) throws Exception {
7367 String command = args [0 ];
7468 String path = args .length > 1 ? args [1 ] : "" ;
7569
76- if (command .equals ("faces-bounding-boxes" )) {
77- analyzeFacesBoundingBoxes (path );
78- }
79- if (command .equals ("faces-emotions" )) {
80- analyzeFaceEmotions (path );
81- }
8270 if (command .equals ("speech-transcription" )) {
8371 speechTranscription (path );
8472 }
8573 }
8674
87-
88- // [START video_face_bounding_boxes]
89- /**
90- * Detects faces' bounding boxes on the video at the provided Cloud Storage path.
91- *
92- * @param gcsUri the path to the video file to analyze.
93- */
94- public static void analyzeFacesBoundingBoxes (String gcsUri ) throws Exception {
95- // Instantiate a com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient
96- try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
97- // Set the configuration to include bounding boxes
98- FaceConfig config = FaceConfig .newBuilder ()
99- .setIncludeBoundingBoxes (true )
100- .build ();
101-
102- // Set the video context with the above configuration
103- VideoContext context = VideoContext .newBuilder ()
104- .setFaceDetectionConfig (config )
105- .build ();
106-
107- // Create the request
108- AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
109- .setInputUri (gcsUri )
110- .addFeatures (Feature .FACE_DETECTION )
111- .setVideoContext (context )
112- .build ();
113-
114- // asynchronously perform facial analysis on videos
115- OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
116- client .annotateVideoAsync (request );
117-
118- System .out .println ("Waiting for operation to complete..." );
119- boolean faceFound = false ;
120- // Display the results
121- for (VideoAnnotationResults results : response .get (900 , TimeUnit .SECONDS )
122- .getAnnotationResultsList ()) {
123- int faceCount = 0 ;
124- // Display the results for each face
125- for (FaceDetectionAnnotation faceAnnotation : results .getFaceDetectionAnnotationsList ()) {
126- faceFound = true ;
127- System .out .println ("\n Face: " + ++faceCount );
128- // Each FaceDetectionAnnotation has only one segment.
129- for (FaceSegment segment : faceAnnotation .getSegmentsList ()) {
130- double startTime = segment .getSegment ().getStartTimeOffset ().getSeconds ()
131- + segment .getSegment ().getStartTimeOffset ().getNanos () / 1e9 ;
132- double endTime = segment .getSegment ().getEndTimeOffset ().getSeconds ()
133- + segment .getSegment ().getEndTimeOffset ().getNanos () / 1e9 ;
134- System .out .printf ("Segment location: %.3fs to %.3f\n " , startTime , endTime );
135- }
136- // There are typically many frames for each face,
137- try {
138- // Here we process only the first frame.
139- if (faceAnnotation .getFramesCount () > 0 ) {
140- FaceDetectionFrame frame = faceAnnotation .getFrames (0 ); // get the first frame
141- double timeOffset = frame .getTimeOffset ().getSeconds ()
142- + frame .getTimeOffset ().getNanos () / 1e9 ;
143- System .out .printf ("First frame time offset: %.3fs\n " , timeOffset );
144- // print info on the first normalized bounding box
145- NormalizedBoundingBox box = frame .getAttributes (0 ).getNormalizedBoundingBox ();
146- System .out .printf ("\t Left: %.3f\n " , box .getLeft ());
147- System .out .printf ("\t Top: %.3f\n " , box .getTop ());
148- System .out .printf ("\t Bottom: %.3f\n " , box .getBottom ());
149- System .out .printf ("\t Right: %.3f\n " , box .getRight ());
150- } else {
151- System .out .println ("No frames found in annotation" );
152- }
153- } catch (IndexOutOfBoundsException ioe ) {
154- System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
155- }
156- }
157- }
158-
159- if (!faceFound ) {
160- System .out .println ("No faces detected in " + gcsUri );
161- }
162- }
163- }
164- // [END video_face_bounding_boxes]
165-
166- // [START video_face_emotions]
167- /**
168- * Analyze faces' emotions over frames on the video at the provided Cloud Storage path.
169- *
170- * @param gcsUri the path to the video file to analyze.
171- */
172- public static void analyzeFaceEmotions (String gcsUri ) throws Exception {
173- // Instantiate a com.google.cloud.videointelligence.v1p1beta1.VideoIntelligenceServiceClient
174- try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient .create ()) {
175- // Set the configuration to include bounding boxes
176- FaceConfig config = FaceConfig .newBuilder ()
177- .setIncludeEmotions (true )
178- .build ();
179-
180- // Set the video context with the above configuration
181- VideoContext context = VideoContext .newBuilder ()
182- .setFaceDetectionConfig (config )
183- .build ();
184-
185- // Create the request
186- AnnotateVideoRequest request = AnnotateVideoRequest .newBuilder ()
187- .setInputUri (gcsUri )
188- .addFeatures (Feature .FACE_DETECTION )
189- .setVideoContext (context )
190- .build ();
191-
192- // asynchronously perform facial analysis on videos
193- OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
194- client .annotateVideoAsync (request );
195-
196- System .out .println ("Waiting for operation to complete..." );
197- boolean faceFound = false ;
198- // Display the results
199- for (VideoAnnotationResults results : response .get (600 , TimeUnit .SECONDS )
200- .getAnnotationResultsList ()) {
201- int faceCount = 0 ;
202- // Display the results for each face
203- for (FaceDetectionAnnotation faceAnnotation : results .getFaceDetectionAnnotationsList ()) {
204- faceFound = true ;
205- System .out .println ("\n Face: " + ++faceCount );
206- // Each FaceDetectionAnnotation has only one segment.
207- for (FaceSegment segment : faceAnnotation .getSegmentsList ()) {
208- double startTime = segment .getSegment ().getStartTimeOffset ().getSeconds ()
209- + segment .getSegment ().getStartTimeOffset ().getNanos () / 1e9 ;
210- double endTime = segment .getSegment ().getEndTimeOffset ().getSeconds ()
211- + segment .getSegment ().getEndTimeOffset ().getNanos () / 1e9 ;
212- System .out .printf ("Segment location: %.3fs to %.3f\n " , startTime , endTime );
213- }
214-
215- try {
216- // Print each frame's highest emotion
217- for (FaceDetectionFrame frame : faceAnnotation .getFramesList ()) {
218- double timeOffset = frame .getTimeOffset ().getSeconds ()
219- + frame .getTimeOffset ().getNanos () / 1e9 ;
220- float highestScore = 0.0f ;
221- String emotion = "" ;
222- // Get the highest scoring emotion for the current frame
223- for (EmotionAttribute emotionAttribute : frame .getAttributes (0 ).getEmotionsList ()) {
224- if (emotionAttribute .getScore () > highestScore ) {
225- highestScore = emotionAttribute .getScore ();
226- emotion = emotionAttribute .getEmotion ().name ();
227- }
228- }
229- System .out .printf ("\t %4.2fs: %14s %4.3f\n " , timeOffset , emotion , highestScore );
230- }
231-
232- } catch (IndexOutOfBoundsException ioe ) {
233- System .out .println ("Could not retrieve frame: " + ioe .getMessage ());
234- }
235- }
236- }
237-
238- if (!faceFound ) {
239- System .out .println ("No faces detected in " + gcsUri );
240- }
241- }
242- }
243- // [END video_face_emotions]
244-
24575 // [START video_speech_transcription]
24676 /**
24777 * Transcribe speech from a video stored on GCS.
@@ -268,7 +98,7 @@ public static void speechTranscription(String gcsUri) throws Exception {
26898 .setVideoContext (context )
26999 .build ();
270100
271- // asynchronously perform facial analysis on videos
101+ // asynchronously perform speech transcription on videos
272102 OperationFuture <AnnotateVideoResponse , AnnotateVideoProgress > response =
273103 client .annotateVideoAsync (request );
274104
0 commit comments