From d17ab60dffb12ca5c38955e9e66729fc0fa0b8a1 Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Sun, 16 Jun 2019 10:17:33 -0500 Subject: [PATCH 01/12] Initial push for pcap support in tensorflow io. Still untested. --- tensorflow_io/pcap/BUILD | 20 ++ tensorflow_io/pcap/__init__.py | 32 ++++ tensorflow_io/pcap/kernels/pcap_input.cc | 212 ++++++++++++++++++++++ tensorflow_io/pcap/ops/pcap_ops.cc | 46 +++++ tensorflow_io/pcap/python/__init__.py | 0 tensorflow_io/pcap/python/ops/__init__.py | 0 tensorflow_io/pcap/python/ops/pcap_ops.py | 74 ++++++++ tests/test_pcap.py | 154 ++++++++++++++++ tests/test_pcap/http.pcap | Bin 0 -> 25803 bytes 9 files changed, 538 insertions(+) create mode 100644 tensorflow_io/pcap/BUILD create mode 100644 tensorflow_io/pcap/__init__.py create mode 100644 tensorflow_io/pcap/kernels/pcap_input.cc create mode 100644 tensorflow_io/pcap/ops/pcap_ops.cc create mode 100644 tensorflow_io/pcap/python/__init__.py create mode 100644 tensorflow_io/pcap/python/ops/__init__.py create mode 100644 tensorflow_io/pcap/python/ops/pcap_ops.py create mode 100644 tests/test_pcap.py create mode 100644 tests/test_pcap/http.pcap diff --git a/tensorflow_io/pcap/BUILD b/tensorflow_io/pcap/BUILD new file mode 100644 index 000000000..c6cd9bf9d --- /dev/null +++ b/tensorflow_io/pcap/BUILD @@ -0,0 +1,20 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "pcap_ops", + srcs = [ + "kernels/pcap_input.cc", + "ops/pcap_ops.cc", + ], + copts = [ + "-pthread", + "-std=c++11", + "-DNDEBUG", + ], + linkstatic = True, + deps = [ + "//tensorflow_io/core:dataset_ops", + ], +) diff --git a/tensorflow_io/pcap/__init__.py b/tensorflow_io/pcap/__init__.py new file mode 100644 index 000000000..6d7cbcb2b --- /dev/null +++ b/tensorflow_io/pcap/__init__.py @@ -0,0 +1,32 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""PcapInput/PcapOutput + +@@PcapOutputSequence +@@PcapDataset +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "PcapOutputSequence", + "PcapDataset", +] + +remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc new file mode 100644 index 000000000..127802819 --- /dev/null +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -0,0 +1,212 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "kernels/dataset_ops.h" +#include "tensorflow/core/lib/io/buffered_inputstream.h" + +namespace tensorflow { +namespace data { + +class PcapInputStream : public io::BufferedInputStream { +public: + + const uint32_t MAGIC_NUMBER = 0xA1B2C3D4; // magic number for big endian machine with microsecond resolution + const uint32_t MAGIC_NUMBER_REVERSED = 0xD4C3B2A1; // magic number for little endian machine with microsecond resolution + + const uint16_t PCAP_ERRBUF_SIZE = 256; + const uint16_t PCAP_TSTAMP_PRECISION_MICRO = 0; // use timestamps with microsecond precision by default + + explicit PcapInputStream(InputStreamInterface* input_stream) + : io::BufferedInputStream(input_stream, 256 * 1024) { + } + + Status ReadRecord(double& timestamp, string* packet_data, int64& record_read) { + string buffer; + buffer.clear(); + + // read packet header + TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct pcapPacketHeader), &buffer)); + struct pcapPacketHeader *header = (struct pcapPacketHeader *)buffer.data(); + + if (reverse_header_byte_order) { + // switch byte order to get accurate representation of field values + EndianSwap(header->ts_sec); + EndianSwap(header->ts_msec); + EndianSwap(header->caplen); + } + + // Combine date and time in seconds plus milliseconds offset into one composite value + timestamp = header->ts_sec + (header->ts_msec / 1e6); + + // read packet data + TF_RETURN_IF_ERROR(ReadNBytes(header->caplen, packet_data)); + + record_read = 1; // this method reads one packet at a time from the input buffer + + return Status::OK(); + } + + Status ReadHeader() { + string buffer; + // read file header + TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct PcapFileHeader), &buffer)); + struct PcapFileHeader *header = (struct PcapFileHeader *)buffer.data(); + + if(!ValidateMagicNumber(header->magic_number)) + return errors::InvalidArgument("PCAP file must starts with a standard magic number."); + } + + if (reverse_header_byte_order) { + // switch byte order to get accurate representation of packet slices + // snaplen will be needed to reconstruct sliced payloads spread across multiple pcap packets + EndianSwap(header->snaplen) + } + return Status::OK(); + } + +private: + + /** + PcapFileHeader format: (https://wiki.wireshark.org/Development/LibpcapFileFormat) + magic_number: used to detect the file format itself and the byte ordering. The writing application writes 0xa1b2c3d4 with it's native byte ordering format into this field. The reading application will read either 0xa1b2c3d4 (identical) or 0xd4c3b2a1 (swapped). If the reading application reads the swapped 0xd4c3b2a1 value, it knows that all the following fields will have to be swapped too. For nanosecond-resolution files, the writing application writes 0xa1b23c4d, with the two nibbles of the two lower-order bytes swapped, and the reading application will read either 0xa1b23c4d (identical) or 0x4d3cb2a1 (swapped). + version_major, version_minor: the version number of this file format (current version is 2.4) + thiszone: the correction time in seconds between GMT (UTC) and the local timezone of the following packet header timestamps. Examples: If the timestamps are in GMT (UTC), thiszone is simply 0. If the timestamps are in Central European time (Amsterdam, Berlin, ...) which is GMT + 1:00, thiszone must be -3600. In practice, time stamps are always in GMT, so thiszone is always 0. + sigfigs: in theory, the accuracy of time stamps in the capture; in practice, all tools set it to 0 + snaplen: the "snapshot length" for the capture (typically 65535 or even more, but might be limited by the user), see: incl_len vs. orig_len below + linktype: link-layer header type, specifying the type of headers at the beginning of the packet (e.g. 1 for Ethernet, see tcpdump.org's link-layer header types page for details); this can be various types such as 802.11, 802.11 with various radio information, PPP, Token Ring, FDDI, etc. + */ + struct PcapFileHeader { + uint32_t magic_number; + uint16_t version_major; + uint16_t version_minor; + int32_t thiszone; + uint32_t sigfigs; + uint32_t snaplen; + uint32_t linktype; + }; + + + /** + PacketHeader format: + ts_sec: the date and time when this packet was captured. This value is in seconds since January 1, 1970 00:00:00 GMT; this is also known as a UN*X time_t. You can use the ANSI C time() function from time.h to get this value, but you might use a more optimized way to get this timestamp value. If this timestamp isn't based on GMT (UTC), use thiszone from the global header for adjustments. + ts_msec: in regular pcap files, the microseconds when this packet was captured, as an offset to ts_sec. In nanosecond-resolution files, this is, instead, the nanoseconds when the packet was captured, as an offset to ts_sec /!\ Beware: this value shouldn't reach 1 second (in regular pcap files 1 000 000; in nanosecond-resolution files, 1 000 000 000); in this case ts_sec must be increased instead! + caplen: the number of bytes of packet data actually captured and saved in the file. This value should never become larger than orig_len or the snaplen value of the global header. + orig_len: the length of the packet as it appeared on the network when it was captured. If incl_len and orig_len differ, the actually saved packet size was limited by snaplen. + */ + struct PacketHeader + { + uint32_t ts_sec; + uint32_t ts_msec; + uint32_t caplen; + uint32_t orig_len; + }; + + inline void EndianSwap(uint16_t& x) + { + x = (x>>8) | + (x<<8); + } + + inline void EndianSwap(int32_t& x) + { + x = (x>>24) | + ((x<<8) & 0x00FF0000) | + ((x>>8) & 0x0000FF00) | + (x<<24); + } + + bool reverse_header_byte_order = false; // is the pcap file using little endian or big endian byte order + + /** + Check for the magic numbers of the two most typical pcap formats used in practice. + */ + bool ValidateMagicNumber(uint32_t magic) { + if (magic_number == MAGIC_NUMBER) { + return true; + } else if (magic_number == MAGIC_NUMBER_REVERSED) { + reverse_header_byte_order = true; + return true; + } else { + return false; + } + } + +}; // end of class PcapInputStream + + +class PcapInput: public FileInput { + public: + Status ReadRecord(io::InputStreamInterface* s, IteratorContext* ctx, std::unique_ptr& state, int64 record_to_read, int64* record_read, std::vector* out_tensors) const override { + if (state.get() == nullptr) { + state.reset(new PcapInputStream(s)); + TF_RETURN_IF_ERROR(state.get()->ReadHeader()); + } + + // Let's allocate enough space for Tensor, if more than read, replace. + // The output tensor has two columns (packet_timestamp,packet_data). + // Hence the shape of the output tensor is (record_to_read,2) unless there are less than record_to_read packets left in the file + Tensor tensor_packet_ts(ctx->allocator({}), DT_DOUBLE, {record_to_read}); // Tensor column for packet timestamps + out_tensors->emplace_back(std::move(tensor_packet_ts)); // add timestamp column to the output tensor + Tensor tensor_packet_data(ctx->allocator({}), DT_STRING, {record_to_read}); // Tensor column for packet data + out_tensors->emplace_back(std::move(tensor_packet_data)); // add data column to the output tensor + + // read packets from the file up to record_to_read or end of file + while ((*record_read) < record_to_read) { + int64 record_count = 0; + double packet_timestamp; + string packet_data_buffer; + TF_RETURN_IF_ERROR(state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count)); + if (record_count > 0) { + Tensor timestamp_tensor = (*out_tensors)[0]; + timestamp_tensor->flat()(record_read) = packet_timestamp; + Tensor data_tensor = (*out_tensors)[1]; + timestamp_tensor.flat()(record_read) = std::move(packet_data_buffer) + (*record_read) += record_count; + } else { + // no more records available to read + // record_count == 0 + break; + } + } + if (*record_read < record_to_read) { + if (*record_read == 0) { + out_tensors->clear(); + } + for (size_t i = 0; i < out_tensors->size(); i++) { + Tensor tensor = (*out_tensors)[i].Slice(0, *record_read); + (*out_tensors)[i] = std::move(tensor); + } + } + return Status::OK(); + } + Status FromStream(io::InputStreamInterface* s) override { + return Status::OK(); + } + void EncodeAttributes(VariantTensorData* data) const override { + } + bool DecodeAttributes(const VariantTensorData& data) override { + return true; + } + protected: +}; + +REGISTER_UNARY_VARIANT_DECODE_FUNCTION(PcapInput, "tensorflow::data::PcapInput"); + +REGISTER_KERNEL_BUILDER(Name("PcapInput").Device(DEVICE_CPU), + FileInputOp); +REGISTER_KERNEL_BUILDER(Name("PcapDataset").Device(DEVICE_CPU), + FileInputDatasetOp); +} // namespace data +} // namespace tensorflow diff --git a/tensorflow_io/pcap/ops/pcap_ops.cc b/tensorflow_io/pcap/ops/pcap_ops.cc new file mode 100644 index 000000000..4fb9903f0 --- /dev/null +++ b/tensorflow_io/pcap/ops/pcap_ops.cc @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("PcapInput") + .Input("source: string") + .Output("handle: variant") + .Attr("filters: list(string) = []") + .Attr("columns: list(string) = []") + .Attr("schema: string = ''") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->MakeShape({c->UnknownDim()})); + return Status::OK(); + }); + +REGISTER_OP("PcapDataset") + .Input("input: T") + .Input("batch: int64") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("T: {string, variant} = DT_VARIANT") + .SetIsStateful() + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->MakeShape({})); + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow_io/pcap/python/__init__.py b/tensorflow_io/pcap/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tensorflow_io/pcap/python/ops/__init__.py b/tensorflow_io/pcap/python/ops/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py new file mode 100644 index 000000000..5786cd4a2 --- /dev/null +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -0,0 +1,74 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""PcapInput/PcapOutput.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow import dtypes +from tensorflow.compat.v1 import data +from tensorflow.python.framework import ops +from tensorflow_io.core.python.ops import core_ops + +# @tf_export("PcapDataset") +class PcapDataset(data.Dataset): + """A pcap Dataset. Pcap is a popular file format for capturing network packets. + """ + + def __init__(self, filenames, batch=None): + """Create a pcap Reader. + + Args: + filenames: A `tf.string` tensor containing one or more filenames. + """ + super(PcapDataset, self).__init__() + self._filenames = ops.convert_to_tensor( + filenames, dtype=dtypes.string, name="filenames") + self._batch = 0 if batch is None else batch + + def _inputs(self): + return [] + + def _as_variant_tensor(self): + return text_ops.text_dataset( + self._data_input, + self._batch, + output_types=self.output_types, + output_shapes=self.output_shapes) + + def _as_variant_tensor(self): + return parquet_ops.parquet_dataset( + self._data_input, + self._batch, + output_types=self.output_types, + output_shapes=self.output_shapes) + + @property + def output_classes(self): + return tuple([tf.Tensor for _ in range(2)]) # 2 columns - packet ts and data + + @property + def output_shapes(self): + return tuple( + [tf.TensorShape([]) for _ in self._columns] + ) if self._batch is None else \ + tuple([tf.TensorShape([None]) for _ in range(2)] # 2 columns - packet timestamp and data + ) + + @property + def output_types(self): + return tuple([dtypes.string, dtypes.float64]) + diff --git a/tests/test_pcap.py b/tests/test_pcap.py new file mode 100644 index 000000000..120d13a4a --- /dev/null +++ b/tests/test_pcap.py @@ -0,0 +1,154 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +""" +This test expands on the DPKT HTTP example and the TF IO test_text code. +https://github.com/deeprtc/dpkt/blob/master/examples/print_http_requests.py +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import pytest +import tensorflow as tf + +tf.compat.v1.disable_eager_execution() + +from tensorflow import errors # pylint: disable=wrong-import-position +import tensorflow_io.text as text_io # pylint: disable=wrong-import-position + +import dpkt +import datetime +import socket +from dpkt.compat import compat_ord + + +def mac_addr(address): + """Convert a MAC address to a readable/printable string + + Args: + address (str): a MAC address in hex form (e.g. '\x01\x02\x03\x04\x05\x06') + Returns: + str: Printable/readable MAC address + """ + return ':'.join('%02x' % compat_ord(b) for b in address) + + +def inet_to_str(inet): + """Convert inet object to a string + + Args: + inet (inet struct): inet network address + Returns: + str: Printable/readable IP address + """ + # First try ipv4 and then ipv6 + try: + return socket.inet_ntop(socket.AF_INET, inet) + except ValueError: + return socket.inet_ntop(socket.AF_INET6, inet) + + +def print_http_requests(pcap): + """Print out information about each packet in a pcap + + Args: + pcap: dpkt pcap reader object (dpkt.pcap.Reader) + """ + # For each packet in the pcap process the contents + for timestamp, buf in pcap: + + # Unpack the Ethernet frame (mac src/dst, ethertype) + eth = dpkt.ethernet.Ethernet(buf) + + # Make sure the Ethernet data contains an IP packet + if not isinstance(eth.data, dpkt.ip.IP): + print('Non IP Packet type not supported %s\n' % eth.data.__class__.__name__) + continue + + # Now grab the data within the Ethernet frame (the IP packet) + ip = eth.data + + # Check for TCP in the transport layer + if isinstance(ip.data, dpkt.tcp.TCP): + + # Set the TCP data + tcp = ip.data + + # Now see if we can parse the contents as a HTTP request + try: + request = dpkt.http.Request(tcp.data) + except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError): + continue + + # Pull out fragment information (flags and offset all packed into off field, so use bitmasks) + do_not_fragment = bool(ip.off & dpkt.ip.IP_DF) + more_fragments = bool(ip.off & dpkt.ip.IP_MF) + fragment_offset = ip.off & dpkt.ip.IP_OFFMASK + + # Print out the info + print('Timestamp: ', str(datetime.datetime.utcfromtimestamp(timestamp))) + print('Ethernet Frame: ', mac_addr(eth.src), mac_addr(eth.dst), eth.type) + print('IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d)' % + (inet_to_str(ip.src), inet_to_str(ip.dst), ip.len, ip.ttl, do_not_fragment, more_fragments, + fragment_offset)) + print('HTTP request: %s\n' % repr(request)) + + # Check for Header spanning acrossed TCP segments + if not tcp.data.endswith(b'\r\n'): + print('\nHEADER TRUNCATED! Reassemble TCP segments!\n') + + +def test_http_pcap(): + """Open up a test pcap file and print out the packets""" + with open('test_pcap/http.pcap', 'rb') as f: + pcap = dpkt.pcap.Reader(f) + print_http_requests(pcap) + + +def test_pcap_input(): + """test_pcap_input + """ + pcap_filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "test_pcap", "http.pcap") + + with open(pcap_filename, 'rb') as f: + pcapr = dpkt.pcap.Reader(f) + # read packets from test pcap file in a memory array + pcap = [p for p in pcapr] + + pcap = pcap*2 + + file_url = "file://" + pcap_filename + url_filenames = [file_url, file_url] + dataset = text_io.PcapDataset(url_filenames, batch=2) + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + with tf.compat.v1.Session() as sess: + sess.run(init_op) + for i in range(0, len(pcap), 2) + v = sess.run(get_next) + for j in range(2) + orig_timestamp, orig_buf = pcap[i+j] + sample_timestamp, sample_buf = v[j] + assert orig_timestamp == sample_timestamp + assert orig_buf == sample_buf + with pytest.raises(errors.OutOfRangeError): + sess.run(get_next) + + +if __name__ == "__main__": + test.main() diff --git a/tests/test_pcap/http.pcap b/tests/test_pcap/http.pcap new file mode 100644 index 0000000000000000000000000000000000000000..54f6f2953d5553eddc5af6bc29e29b49ceb864ba GIT binary patch literal 25803 zcmeHQdwdkt^&beJMxw!25&gk+F&JcbXE*OR+aNbFXjh*nICuW)x%o96ib~BjCg2nHy$2{ro7V4zBU{>!XC;$L~&N zni7R)4+jK>Ztg{*Y&7TosRgeI`7>ghpB_c8q11k)uXGF=E81krx8B z7Ct9RO@D_@Q>Vr-GZ=QvXI7+y6d6~ZbzBE`BTzgmbjCSvT>b&2=8LCR25-XO6y6F? zZ=$>%OycbRNyke{O^Add9DK8R4 zkz-5k4pKNrkbst2O?s+{aZP4*lTy30n(&gX$6Za7{wk-vY$|Exy?ruU8M9nH|+WGbUqZMz4l zSe==uOv_~$Q?CdJopjC2%WZ-It0LWju$4HANhcSMBwj(-4$c&2&&}0$o-?18OZ!yTbx1sOws-rKh{aR$-Z5 zms@i|Q&(g6JXbTQLr}~m#MRT#(cVZbHkO@J+{m&`-A!a}TX$y%0TRTmDS}sH+2&am zc-8`6vV+*rX;bMpcAw_609Cw4fB{LZwZ!S!K(Sp`{H(LGvQlqn!H23u7`a*tFIh@pgG_^h&nNPh=&P`I@Hj^T3g266VJ&qvLeOt=D9V1>Lg;AK zTFikY0XYa1RK*MAf~*d5VEumCR#sxu6i!m@0B;t;i5jqEzN)OmLIS+t55P^huB(BT ze)yM*W0chl*_mt`T#J0KW=ZZttR0HIC@N?e_oBq_N(mK7tz{q}XuJ(a70f}B2NW(; zZK;dtMW8g+5VRycGYa-WA2%FmDmjBbNWm{iHu?luuOl>A{S5_)1A#v^y|NM{5*Gw5 zRe4VF22e+}fb6TagkY+K(WbbaU^=FX;>^i<{esOSNNhke+Q+(TyaIIwl)?N2y^nqZ z1`-t)*+FZGY8)X2fcH=Sc|;WNlbr&l@bnhyT6~=76de70Tu1LgbS)IXdy`5QykAH0 z-jRs+y$8)%W5=ZoWA+00HdR~iHJzO-b#6L@RaE;sm{VDysNJ)@1v z1O8rJ!{S#7O$A9)WM7yXl%X)~akqv+(u5XdY&abFWUmSxXp+5Q^r@&PI*ivw2SF56 z4GDGN-{T?B%PllUpr!uO;~|jj8A7LGz9J=9!-C+_NLJ$d(+YO7!izko@?bGty}g20 z;6yUpxRs7WolZt!PnOX^0Jj~mtP3xR3$9UH(+=2>sKG)+GO3v=a9q?x-p_#t(t&@- zELju0yiO-!&@QmqNCOWu52^Q&lCp}yG6%8QU{K>QoAY@@+1rP9qIwk}6pu*k0=*g@oNvBY6hHwrtLURjit`MSG><4^`a#l;* zlH%T^&o8JRq*?NzswL}~sbK(ifg%%Z(Fx`i7y|Ga1@Km~S}%HTI*xY-coH>lRSqj& zp1?|kCq{e10S|(tYT%;7f`xzx&1Jt8;J`=PI1RerAxPmtwuSe}3TLHl+_K0if=awF zA9#3Z7Y0JIM8Nc6K?ODx^>YV!4@}h>kIjITps6G%D2lA8b{H1~tw2egTW75iSOdqi zL}wqDn^n0{6pmZXn3FagaUY3tmyjYafT#?$%w9p{ReLB9n(kNRaA-bQR8eUuIJ8s^Td<8zlj!lx-xk3W}uSjsS#7z-(ioWw>3Ws4EW zpvOq&_;pz)@Aug?P$6Ejb6z{Av4Yeqs}VIuH*UBnpUGOGlK~wt3>7T`?%B8Zj;9G!?#J{#DTa;@!t;0O{C6}}VcV2#)%usr zB4)xEeK}!#H-_uue3F_2O3rMXi)#sB@hWE2u1~51NuRygm&=!qVorj&1Amj3Gb4|j z-idS9y(;ztD0uUxF|M)~CKlTJ+{;|!EfuFiB%iLQM{2#C4W^)k^a!RT zra&=-PYyznPDM#cNpVTBqo|^EiWr3Q9Y;k`Nm;q0$T3AvKGa4t4^yDL2QtRBSPwG= zk^^v|g$j!63yNCc|H-9Sa0|0d(Gt2TM!LdZtLGw&&zMK3)sqOu9ZX8p>KO^+4!%oI zZkR;$5)=JiN^PF8uuGp!__k0FDhh4?hJ||Zx`H)dXckW%&YH!WMP0hTq`GuV_Ml5Q z$ELeDEbLsAJ8g%JmQRpF}+R**AB!b7~vc+hmoqJHsM3g3KKx_24qvJP383 zK0d_j89#tTUXeJgU=#(90$D+LgCKduFc#U7)(*kLwsy~)X2q{VkPWkvS&$ZU&+2qp ziK}sTvz}0Foz+9?n`XJ~KxdvDh9qKy1h{^9xlBxGj?Y#iQ$a{Q0{6U`!Nm79!+l!k zY$O-;%CdwqKxa}JAXE@#CT?Vy zvlpfzdx|UO^Ux$D$Q*NBQB<(hwO}gUr6`83CkCfarM+(8yC5SmoF3y0?%_R_!jX~H`TLx}f6=h$pW{S%7R7bhb1iZ(Ng z_w4BmlSYuHk=cEeFPgG~>^#)9VkR_Bg!)bFrZufbq$7msBut=jQ)kjAWDmPhOnc-^s!Y~h|zGk$0 z3RIp#EZ^q(>44hd9o$c@`jd07@R9=pJh@sXdzN z$9f_*thBRX&S%_&8WY&GW*sIwkF+0IoRLh~<#Ya&w3sJ9X4{Kb& z&ilgIN%jC(XYDJ`L*iWY@Sv7Y%vWtf&%A@zJ@YdcW=*a=K=FQRBHpjsWP0Y$FJ~Cg7HXnk z^#teo!XP>?MmT|bWmstnRaa0*#lvP9TE=vdO1BDt`{n1STCrIo-k*3gbUN%s4%pEH z+fMX9&f_x3VK@#NE~7y-Mynl4B2F*1x-Uf6?|K$SmMBdp!k~_kO!DC;JK%< zqdtyQ0&;;STmZFz^=d%mBczqUwlh(dpnkT-tA+=9$>p4OCGmk}LO%xtZyz!o?yJ+Q z2KDmp6c^DPS2tT+=`a#=g=U_v++Kvf=`fs29PJbi*lQ_&>}*#jqMyOP$$fOhl0f>{ zI?)L@rEq<`Tkh4Ua+o*tehi;;h|q)wZBQvk$x=<>CDtp;eLQUafem-)8y8#Z8tI*= zeHKDe2teQQTqwlFCg;4&^i&j!x1Y1b7;}C-B zuFy`zFZlFqy`>InlcBDS+MW=ociX169D38Nh{aA(5eEJoH&!7a`)jX?tZujhZ zIy%D^DNI1<(Y0WWD6PS#a@Fq+QJyi7gDKSX=rCQSQ!$r0DjPz)cdj`tT2Gx6Y|Up1 zmsc=M`Ix8LF*r$st**Rj7@RD7fgEw=FWNI^6~kNvRKEB)Yl_ZAXN^6nv&Jp?IBT>* z5EZFR>Ca(a9nH|+WL`!;xBPYF`?EW_7ibrsm6t%Uut@O{D!Y_FRco zuAbTOcQ$FCg`XQ?qa|!fOAGHAmsxi+~EcD}`4E}Hu}i^_|RwfD3{-#wa_JL$%gX~=!#(bwkeQ{S)1KYHzT z%U=(+uNiY*PGspb&d3{2bS^q@fBCar<(mtBx8>wle&jp3^YiEb@bdT&@!hifwbyRv z4()oN|L{|ruUa|r(`(s#*BrX)lCH`Fhp&}NuHC$2PuZ*YpYd7Ynbx0xjni|945j9lu&R-u=wV>l*xO9*AMRf>EOkO z!u$HH4^7#~IuHG#y6Ksvf4pe<1qC~Pv!h{P^CMT? zJCE%izx0cmRnJ}5I?v(rH=G5lj;;OvkCGKzUO{_Wa=Xtnlm+4~R?de|QV` zeEu`*C%yF2T@S3Z`jpU1CCrAsYaT!N{^QK1DsI7wo9-R8FSMoYz6od7{_^oxx8A#} ze9@jQ?{$9X?0qNyaQrdz4iMAJFag1u)B0y-Hlt;mw;bGe{{90`9q#z(X#4C}UjKB%Gt2F79?g4t=h%Ur zr>u*7?;m%3Frr<0HT3zf&$WN@;CBxE>1Y3Y(XEbv6uM}_FS;*&YX*CG;I7p(N^4%; zQn>Txt4e3gCqMWjd(ZmY>{~~^dW~|!*N=_5^`D>Qd{jI0^Yi}syAj*hoiU1e2hat7ld+xo)$*N) z?N6SOi0$(x(0K;1efLyIgst^{%s%k+*oUh3KK$bwR=)V&`QeWrD){Rahqs@4?M=J? zxpc*fX}1*2xc|L~Yv<-|x5>YJs333Z&ExLBaGUGhX|_8)T6W0|kxQ>Q*nfwzWM%NV z>ld7IIFNhxk@qhDM0=52B1TPo(k!4^1Hfh)&ki zo9ml8o6R6dwBiglLnk>YX@dh@;QxSQLcI~=dWh~w+>DD(l4*ZnFVnpL$+`SV%9$NKS`m&FAT$;8!K3FjE8}Fv^PVjpM!=vl$y6C{IQRlT}iVM2~}}9Ii;FiM=L0v)bp93Q9C-G^1*O zNd4rgCuhU^%jmjicj~(5HVxN3^>^ub|5nk-+3 z>SDX{FN{@}*-M8KrA{?rL!+XhE7dPhy#RRQ2W=28=fqHeGjI>ON;4v;Zs`P<-V8*T z39&_--MSO+4c~aF;hIr>W1}Mm8sIzVuXZgfD~3(Nu{p0DY4NL zKg|`{A(umJ<{Pu;b23^A#x+1rQPDs-10rwY9a30sB^4QZ77yYX1P)sh!Te0CGsIp& z3l1u2f}WFLdojVNB($3tN{2f(VSKguX5pl6 zT%GzZaD+9zB1Kf?*rrC@n;4hhGY>q`x3#zTk_gm@^uggnMD7JCMF(?7FyJzV(Cf{c2B~fXfpg0-D8e7HMz<(EE~A-m)Cb+X?{jSv(%ZN(tS%o z+T2$eG8Ld#mNl%Tp|uc~JFg_onB8dxcM?lg%*q;we_;$3@;{zPJX3M%o9ck3=27W@ z>h(FQu_ZsA+VISL5xCekI7-l70$PP1kUHk+qC+-3dg!tPy&}8Q0R&r5070#9S{y+r zH2Q`dLe5#VmD$TLusW>Ue;egX@7E#d%J^i5QXZOH44NhM;dycbhuLdc(|Hc%wrI7L_pFp$io9jA+y~Y7T_?FgwfIS zZb_pc3 zd#+KSmX$!%Lur?&g}TBXNEGP|f}I$gv=~)ZSxL>|L>m?X1P`n;bF;O#!HNJJ6Jt>qTaOXW1Eshz zpC1;p;6cTE_Q~NE2wo{_mVisz?~Sz}Y+-`n4Q>I#kdNI6C$_-C)+;NDUcINXc=R`2 zEJ78zANE4gYr1mP0x+H9VTD*|OA67|s`1o8)TV~@)I~c1+^6(&YGPms76pR%{}Wd- zI)x|b!-lPd6ZGNue0VFIqmL)>$JRlterP%@R3V^Umy6Q=V&v#-Ud0vEtJsz5Rcvf? zi3xC5Dr(=TlzZPQyz( z Date: Sun, 16 Jun 2019 12:23:59 -0500 Subject: [PATCH 02/12] updates to finalize the initial version of pcap dataset --- tensorflow_io/pcap/BUILD | 8 +- tensorflow_io/pcap/python/ops/pcap_ops.py | 28 +++-- tests/test_pcap.py | 123 ++-------------------- 3 files changed, 28 insertions(+), 131 deletions(-) diff --git a/tensorflow_io/pcap/BUILD b/tensorflow_io/pcap/BUILD index c6cd9bf9d..7bc561347 100644 --- a/tensorflow_io/pcap/BUILD +++ b/tensorflow_io/pcap/BUILD @@ -2,8 +2,8 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:public"]) -cc_library( - name = "pcap_ops", +cc_binary( + name = "python/ops/_pcap_ops.so", srcs = [ "kernels/pcap_input.cc", "ops/pcap_ops.cc", @@ -13,8 +13,10 @@ cc_library( "-std=c++11", "-DNDEBUG", ], - linkstatic = True, + linkshared = 1, deps = [ "//tensorflow_io/core:dataset_ops", + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", ], ) diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index 5786cd4a2..3127e0a13 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -21,7 +21,8 @@ from tensorflow import dtypes from tensorflow.compat.v1 import data from tensorflow.python.framework import ops -from tensorflow_io.core.python.ops import core_ops + +pcap_ops = _load_library('_pcap_ops.so') # @tf_export("PcapDataset") class PcapDataset(data.Dataset): @@ -34,23 +35,17 @@ def __init__(self, filenames, batch=None): Args: filenames: A `tf.string` tensor containing one or more filenames. """ - super(PcapDataset, self).__init__() - self._filenames = ops.convert_to_tensor( - filenames, dtype=dtypes.string, name="filenames") + self._data_input = audio_ops.wav_input(filenames) self._batch = 0 if batch is None else batch + super(PcapDataset, self).__init__() + + def _inputs(self): return [] def _as_variant_tensor(self): - return text_ops.text_dataset( - self._data_input, - self._batch, - output_types=self.output_types, - output_shapes=self.output_shapes) - - def _as_variant_tensor(self): - return parquet_ops.parquet_dataset( + return pcap_ops.pcap_dataset( self._data_input, self._batch, output_types=self.output_types, @@ -58,17 +53,18 @@ def _as_variant_tensor(self): @property def output_classes(self): - return tuple([tf.Tensor for _ in range(2)]) # 2 columns - packet ts and data + # we output a tensor for packet timestamp and one for packet data + return tuple(tf.Tensor, tf.Tensor) @property def output_shapes(self): return tuple( [tf.TensorShape([]) for _ in self._columns] - ) if self._batch is None else \ - tuple([tf.TensorShape([None]) for _ in range(2)] # 2 columns - packet timestamp and data + ) if self._batch is None else tuple( + [tf.TensorShape([None]), tf.TensorShape([None])] ) @property def output_types(self): - return tuple([dtypes.string, dtypes.float64]) + return tuple([dtypes.float64, dtypes.string]) diff --git a/tests/test_pcap.py b/tests/test_pcap.py index 120d13a4a..d2d23fec8 100644 --- a/tests/test_pcap.py +++ b/tests/test_pcap.py @@ -13,141 +13,40 @@ # the License. # ============================================================================== """ -This test expands on the DPKT HTTP example and the TF IO test_text code. -https://github.com/deeprtc/dpkt/blob/master/examples/print_http_requests.py +Test PcapDataset """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os -import pytest import tensorflow as tf tf.compat.v1.disable_eager_execution() -from tensorflow import errors # pylint: disable=wrong-import-position -import tensorflow_io.text as text_io # pylint: disable=wrong-import-position - -import dpkt -import datetime -import socket -from dpkt.compat import compat_ord - - -def mac_addr(address): - """Convert a MAC address to a readable/printable string - - Args: - address (str): a MAC address in hex form (e.g. '\x01\x02\x03\x04\x05\x06') - Returns: - str: Printable/readable MAC address - """ - return ':'.join('%02x' % compat_ord(b) for b in address) - - -def inet_to_str(inet): - """Convert inet object to a string - - Args: - inet (inet struct): inet network address - Returns: - str: Printable/readable IP address - """ - # First try ipv4 and then ipv6 - try: - return socket.inet_ntop(socket.AF_INET, inet) - except ValueError: - return socket.inet_ntop(socket.AF_INET6, inet) - - -def print_http_requests(pcap): - """Print out information about each packet in a pcap - - Args: - pcap: dpkt pcap reader object (dpkt.pcap.Reader) - """ - # For each packet in the pcap process the contents - for timestamp, buf in pcap: - - # Unpack the Ethernet frame (mac src/dst, ethertype) - eth = dpkt.ethernet.Ethernet(buf) - - # Make sure the Ethernet data contains an IP packet - if not isinstance(eth.data, dpkt.ip.IP): - print('Non IP Packet type not supported %s\n' % eth.data.__class__.__name__) - continue - - # Now grab the data within the Ethernet frame (the IP packet) - ip = eth.data - - # Check for TCP in the transport layer - if isinstance(ip.data, dpkt.tcp.TCP): - - # Set the TCP data - tcp = ip.data - - # Now see if we can parse the contents as a HTTP request - try: - request = dpkt.http.Request(tcp.data) - except (dpkt.dpkt.NeedData, dpkt.dpkt.UnpackError): - continue - - # Pull out fragment information (flags and offset all packed into off field, so use bitmasks) - do_not_fragment = bool(ip.off & dpkt.ip.IP_DF) - more_fragments = bool(ip.off & dpkt.ip.IP_MF) - fragment_offset = ip.off & dpkt.ip.IP_OFFMASK - - # Print out the info - print('Timestamp: ', str(datetime.datetime.utcfromtimestamp(timestamp))) - print('Ethernet Frame: ', mac_addr(eth.src), mac_addr(eth.dst), eth.type) - print('IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d)' % - (inet_to_str(ip.src), inet_to_str(ip.dst), ip.len, ip.ttl, do_not_fragment, more_fragments, - fragment_offset)) - print('HTTP request: %s\n' % repr(request)) - - # Check for Header spanning acrossed TCP segments - if not tcp.data.endswith(b'\r\n'): - print('\nHEADER TRUNCATED! Reassemble TCP segments!\n') - - -def test_http_pcap(): - """Open up a test pcap file and print out the packets""" - with open('test_pcap/http.pcap', 'rb') as f: - pcap = dpkt.pcap.Reader(f) - print_http_requests(pcap) - +import tensorflow_io.pcap as pcap_io # pylint: disable=wrong-import-position def test_pcap_input(): """test_pcap_input """ pcap_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_pcap", "http.pcap") - - with open(pcap_filename, 'rb') as f: - pcapr = dpkt.pcap.Reader(f) - # read packets from test pcap file in a memory array - pcap = [p for p in pcapr] - - pcap = pcap*2 - file_url = "file://" + pcap_filename url_filenames = [file_url, file_url] - dataset = text_io.PcapDataset(url_filenames, batch=2) + dataset = pcap_io.PcapDataset(url_filenames, batch=1) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() with tf.compat.v1.Session() as sess: sess.run(init_op) - for i in range(0, len(pcap), 2) - v = sess.run(get_next) - for j in range(2) - orig_timestamp, orig_buf = pcap[i+j] - sample_timestamp, sample_buf = v[j] - assert orig_timestamp == sample_timestamp - assert orig_buf == sample_buf - with pytest.raises(errors.OutOfRangeError): - sess.run(get_next) + while True: + try: + v = sess.run(get_next) + sample_timestamp, sample_buf = v[0] + assert sample_timestamp != None + assert sample_buf == != None + except tf.errors.OutOfRangeError: + break if __name__ == "__main__": From 53770eb7fc0c0874b9951433c54d7695caf128ab Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Sun, 16 Jun 2019 17:19:24 -0500 Subject: [PATCH 03/12] fixed syntax errors --- tensorflow_io/pcap/__init__.py | 5 ++--- tensorflow_io/pcap/kernels/pcap_input.cc | 16 ++++++++-------- tensorflow_io/pcap/python/ops/pcap_ops.py | 9 +++------ tests/test_pcap.py | 21 ++++++++++++--------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/tensorflow_io/pcap/__init__.py b/tensorflow_io/pcap/__init__.py index 6d7cbcb2b..d8d0aeafd 100644 --- a/tensorflow_io/pcap/__init__.py +++ b/tensorflow_io/pcap/__init__.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""PcapInput/PcapOutput +"""PcapInput -@@PcapOutputSequence @@PcapDataset """ @@ -23,9 +22,9 @@ from __future__ import print_function from tensorflow.python.util.all_util import remove_undocumented +from tensorflow_io.pcap.python.ops.pcap_ops import PcapDataset _allowed_symbols = [ - "PcapOutputSequence", "PcapDataset", ] diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index 127802819..244d164db 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -37,8 +37,8 @@ class PcapInputStream : public io::BufferedInputStream { buffer.clear(); // read packet header - TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct pcapPacketHeader), &buffer)); - struct pcapPacketHeader *header = (struct pcapPacketHeader *)buffer.data(); + TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct PacketHeader), &buffer)); + struct PacketHeader *header = (struct PacketHeader *)buffer.data(); if (reverse_header_byte_order) { // switch byte order to get accurate representation of field values @@ -64,14 +64,14 @@ class PcapInputStream : public io::BufferedInputStream { TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct PcapFileHeader), &buffer)); struct PcapFileHeader *header = (struct PcapFileHeader *)buffer.data(); - if(!ValidateMagicNumber(header->magic_number)) + if(!ValidateMagicNumber(header->magic_number)) { return errors::InvalidArgument("PCAP file must starts with a standard magic number."); } if (reverse_header_byte_order) { // switch byte order to get accurate representation of packet slices // snaplen will be needed to reconstruct sliced payloads spread across multiple pcap packets - EndianSwap(header->snaplen) + EndianSwap(header->snaplen); } return Status::OK(); } @@ -119,7 +119,7 @@ class PcapInputStream : public io::BufferedInputStream { (x<<8); } - inline void EndianSwap(int32_t& x) + inline void EndianSwap(uint32_t& x) { x = (x>>24) | ((x<<8) & 0x00FF0000) | @@ -132,7 +132,7 @@ class PcapInputStream : public io::BufferedInputStream { /** Check for the magic numbers of the two most typical pcap formats used in practice. */ - bool ValidateMagicNumber(uint32_t magic) { + bool ValidateMagicNumber(uint32_t magic_number) { if (magic_number == MAGIC_NUMBER) { return true; } else if (magic_number == MAGIC_NUMBER_REVERSED) { @@ -170,9 +170,9 @@ class PcapInput: public FileInput { TF_RETURN_IF_ERROR(state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count)); if (record_count > 0) { Tensor timestamp_tensor = (*out_tensors)[0]; - timestamp_tensor->flat()(record_read) = packet_timestamp; + timestamp_tensor.flat()(*record_read) = packet_timestamp; Tensor data_tensor = (*out_tensors)[1]; - timestamp_tensor.flat()(record_read) = std::move(packet_data_buffer) + timestamp_tensor.flat()(*record_read) = std::move(packet_data_buffer); (*record_read) += record_count; } else { // no more records available to read diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index 3127e0a13..ce171f5dd 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -20,11 +20,9 @@ import tensorflow as tf from tensorflow import dtypes from tensorflow.compat.v1 import data -from tensorflow.python.framework import ops - +from tensorflow_io import _load_library pcap_ops = _load_library('_pcap_ops.so') -# @tf_export("PcapDataset") class PcapDataset(data.Dataset): """A pcap Dataset. Pcap is a popular file format for capturing network packets. """ @@ -35,7 +33,7 @@ def __init__(self, filenames, batch=None): Args: filenames: A `tf.string` tensor containing one or more filenames. """ - self._data_input = audio_ops.wav_input(filenames) + self._data_input = pcap_ops.pcap_input(filenames) self._batch = 0 if batch is None else batch super(PcapDataset, self).__init__() @@ -54,7 +52,7 @@ def _as_variant_tensor(self): @property def output_classes(self): # we output a tensor for packet timestamp and one for packet data - return tuple(tf.Tensor, tf.Tensor) + return (tf.Tensor, tf.Tensor) @property def output_shapes(self): @@ -67,4 +65,3 @@ def output_shapes(self): @property def output_types(self): return tuple([dtypes.float64, dtypes.string]) - diff --git a/tests/test_pcap.py b/tests/test_pcap.py index d2d23fec8..7ba7455ba 100644 --- a/tests/test_pcap.py +++ b/tests/test_pcap.py @@ -29,24 +29,27 @@ def test_pcap_input(): """test_pcap_input """ + print("Testing PcapDataset") pcap_filename = os.path.join( os.path.dirname(os.path.abspath(__file__)), "test_pcap", "http.pcap") file_url = "file://" + pcap_filename - url_filenames = [file_url, file_url] + url_filenames = [file_url] dataset = pcap_io.PcapDataset(url_filenames, batch=1) iterator = dataset.make_initializable_iterator() init_op = iterator.initializer get_next = iterator.get_next() with tf.compat.v1.Session() as sess: sess.run(init_op) - while True: - try: - v = sess.run(get_next) - sample_timestamp, sample_buf = v[0] - assert sample_timestamp != None - assert sample_buf == != None - except tf.errors.OutOfRangeError: - break + try: + print("Reading pcap data via tf session") + v = sess.run(get_next) + # sample_timestamp, sample_buf = v[0] + # assert sample_timestamp != None + # assert sample_buf != None + except tf.errors.OutOfRangeError as err: + print("Exception caught during test:") + print(err) + # break if __name__ == "__main__": From 389f941aef20035b3c2026a026c30c9430bb9d6f Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Sun, 16 Jun 2019 17:51:16 -0500 Subject: [PATCH 04/12] added debug info --- tensorflow_io/pcap/kernels/pcap_input.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index 244d164db..e3836f513 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -149,11 +149,13 @@ class PcapInputStream : public io::BufferedInputStream { class PcapInput: public FileInput { public: Status ReadRecord(io::InputStreamInterface* s, IteratorContext* ctx, std::unique_ptr& state, int64 record_to_read, int64* record_read, std::vector* out_tensors) const override { + std::cout << "pcap_input.cc: Entering PcapInput::ReadRecord\n"; if (state.get() == nullptr) { state.reset(new PcapInputStream(s)); TF_RETURN_IF_ERROR(state.get()->ReadHeader()); } - + std::cout << "PCAP header read from file.\n"; + std::cout << "Will read " << record_to_read << " records.\n"; // Let's allocate enough space for Tensor, if more than read, replace. // The output tensor has two columns (packet_timestamp,packet_data). // Hence the shape of the output tensor is (record_to_read,2) unless there are less than record_to_read packets left in the file @@ -161,22 +163,31 @@ class PcapInput: public FileInput { out_tensors->emplace_back(std::move(tensor_packet_ts)); // add timestamp column to the output tensor Tensor tensor_packet_data(ctx->allocator({}), DT_STRING, {record_to_read}); // Tensor column for packet data out_tensors->emplace_back(std::move(tensor_packet_data)); // add data column to the output tensor + std::cout << "Allocated tensor placeholders for packet timestamp and data.\n"; // read packets from the file up to record_to_read or end of file while ((*record_read) < record_to_read) { + std::cout << "Records read so far:" << (*record_read) << "\n"; int64 record_count = 0; double packet_timestamp; string packet_data_buffer; + std::cout << "Reading next record.\n"; TF_RETURN_IF_ERROR(state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count)); + std::cout << "packet record_count: " << record_count << "\n"; + std::cout << "packet timestamp: " << packet_timestamp << "\n"; + std::cout << "packet data length: " << packet_data_buffer.length() << "\n"; if (record_count > 0) { + std::cout << "Placing record in output tensors\n"; Tensor timestamp_tensor = (*out_tensors)[0]; - timestamp_tensor.flat()(*record_read) = packet_timestamp; + timestamp_tensor.flat()(*record_read-1) = packet_timestamp; Tensor data_tensor = (*out_tensors)[1]; - timestamp_tensor.flat()(*record_read) = std::move(packet_data_buffer); + timestamp_tensor.flat()(*record_read-1) = std::move(packet_data_buffer); (*record_read) += record_count; + std::cout << "Record placed in output tensors\n"; } else { // no more records available to read // record_count == 0 + std::cout << "No more records left in the file.\n"; break; } } @@ -208,5 +219,6 @@ REGISTER_KERNEL_BUILDER(Name("PcapInput").Device(DEVICE_CPU), FileInputOp); REGISTER_KERNEL_BUILDER(Name("PcapDataset").Device(DEVICE_CPU), FileInputDatasetOp); + } // namespace data } // namespace tensorflow From 6cfacd44fe656a55d9afd2e852ae621b19ada48b Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Sun, 16 Jun 2019 18:10:42 -0500 Subject: [PATCH 05/12] added more debug info. Still working on crash in pcap_input.cc --- tensorflow_io/pcap/kernels/pcap_input.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index e3836f513..69ffb21f3 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -177,11 +177,15 @@ class PcapInput: public FileInput { std::cout << "packet timestamp: " << packet_timestamp << "\n"; std::cout << "packet data length: " << packet_data_buffer.length() << "\n"; if (record_count > 0) { - std::cout << "Placing record in output tensors\n"; + std::cout << "Placing packet in output tensors\n"; Tensor timestamp_tensor = (*out_tensors)[0]; - timestamp_tensor.flat()(*record_read-1) = packet_timestamp; + timestamp_tensor.flat()(*record_read) = packet_timestamp; + std::cout << "Packet timestamp placed in output tensor.\n"; Tensor data_tensor = (*out_tensors)[1]; - timestamp_tensor.flat()(*record_read-1) = std::move(packet_data_buffer); + std::cout << "data tensor placeholder fetched.\n"; + timestamp_tensor.flat()(*record_read) = std::move(packet_data_buffer); + std::cout << "Packet data placed in output tensor.\n"; + return Status::OK(); // test line, DELETE !!! (*record_read) += record_count; std::cout << "Record placed in output tensors\n"; } else { From 057473938918f40999818c7d0549c86e202397ed Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 00:08:49 -0500 Subject: [PATCH 06/12] working version with debug data --- tensorflow_io/pcap/kernels/pcap_input.cc | 13 ++++++++++--- tests/test_pcap.py | 23 ++++++++++++++--------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index 69ffb21f3..f89ce87fe 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -40,13 +40,21 @@ class PcapInputStream : public io::BufferedInputStream { TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct PacketHeader), &buffer)); struct PacketHeader *header = (struct PacketHeader *)buffer.data(); + std::cout << "packet caplen before endianswap." << header->caplen << "\n"; + std::cout << "packet orig_len before endianswap." << header->orig_len << "\n"; + if (reverse_header_byte_order) { // switch byte order to get accurate representation of field values EndianSwap(header->ts_sec); EndianSwap(header->ts_msec); + EndianSwap(header->caplen); + EndianSwap(header->orig_len); } + std::cout << "packet caplen after endianswap." << header->caplen << "\n"; + std::cout << "packet orig_len after endianswap." << header->orig_len << "\n"; + // Combine date and time in seconds plus milliseconds offset into one composite value timestamp = header->ts_sec + (header->ts_msec / 1e6); @@ -103,7 +111,7 @@ class PcapInputStream : public io::BufferedInputStream { ts_sec: the date and time when this packet was captured. This value is in seconds since January 1, 1970 00:00:00 GMT; this is also known as a UN*X time_t. You can use the ANSI C time() function from time.h to get this value, but you might use a more optimized way to get this timestamp value. If this timestamp isn't based on GMT (UTC), use thiszone from the global header for adjustments. ts_msec: in regular pcap files, the microseconds when this packet was captured, as an offset to ts_sec. In nanosecond-resolution files, this is, instead, the nanoseconds when the packet was captured, as an offset to ts_sec /!\ Beware: this value shouldn't reach 1 second (in regular pcap files 1 000 000; in nanosecond-resolution files, 1 000 000 000); in this case ts_sec must be increased instead! caplen: the number of bytes of packet data actually captured and saved in the file. This value should never become larger than orig_len or the snaplen value of the global header. - orig_len: the length of the packet as it appeared on the network when it was captured. If incl_len and orig_len differ, the actually saved packet size was limited by snaplen. + orig_len: the length of the packet as it appeared on the network when it was captured. If caplen and orig_len differ, the actually saved packet size was limited by snaplen. */ struct PacketHeader { @@ -183,9 +191,8 @@ class PcapInput: public FileInput { std::cout << "Packet timestamp placed in output tensor.\n"; Tensor data_tensor = (*out_tensors)[1]; std::cout << "data tensor placeholder fetched.\n"; - timestamp_tensor.flat()(*record_read) = std::move(packet_data_buffer); + data_tensor.flat()(*record_read) = std::move(packet_data_buffer); std::cout << "Packet data placed in output tensor.\n"; - return Status::OK(); // test line, DELETE !!! (*record_read) += record_count; std::cout << "Record placed in output tensors\n"; } else { diff --git a/tests/test_pcap.py b/tests/test_pcap.py index 7ba7455ba..e35508c47 100644 --- a/tests/test_pcap.py +++ b/tests/test_pcap.py @@ -40,16 +40,21 @@ def test_pcap_input(): get_next = iterator.get_next() with tf.compat.v1.Session() as sess: sess.run(init_op) - try: - print("Reading pcap data via tf session") + v = sess.run(get_next) + first_packet_timestamp = v[0][0] + first_packet_data = v[1][0] + assert first_packet_timestamp == 1084443427.311224 # we know this is the correct value in the test pcap file + assert len(first_packet_data) == 62 # we know this is the correct packet data buffer length in the test pcap file + + + packets_total = 43 # we know this is the correct number of packets in the test pcap file + for packets_read in range(1, packets_total): v = sess.run(get_next) - # sample_timestamp, sample_buf = v[0] - # assert sample_timestamp != None - # assert sample_buf != None - except tf.errors.OutOfRangeError as err: - print("Exception caught during test:") - print(err) - # break + next_packet_timestamp = v[0][0] + next_packet_data = v[1][0] + assert next_packet_timestamp + assert next_packet_data + print(packets_read) if __name__ == "__main__": From 7ece1a0c5a05e87a367a1a06dd11eb7ee963dfa0 Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 00:14:18 -0500 Subject: [PATCH 07/12] working version for pcap dataset --- tensorflow_io/pcap/kernels/pcap_input.cc | 22 ---------------------- tests/test_pcap.py | 1 - 2 files changed, 23 deletions(-) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index f89ce87fe..a02265f53 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -40,21 +40,14 @@ class PcapInputStream : public io::BufferedInputStream { TF_RETURN_IF_ERROR(ReadNBytes(sizeof(struct PacketHeader), &buffer)); struct PacketHeader *header = (struct PacketHeader *)buffer.data(); - std::cout << "packet caplen before endianswap." << header->caplen << "\n"; - std::cout << "packet orig_len before endianswap." << header->orig_len << "\n"; - if (reverse_header_byte_order) { // switch byte order to get accurate representation of field values EndianSwap(header->ts_sec); EndianSwap(header->ts_msec); - EndianSwap(header->caplen); EndianSwap(header->orig_len); } - std::cout << "packet caplen after endianswap." << header->caplen << "\n"; - std::cout << "packet orig_len after endianswap." << header->orig_len << "\n"; - // Combine date and time in seconds plus milliseconds offset into one composite value timestamp = header->ts_sec + (header->ts_msec / 1e6); @@ -157,13 +150,10 @@ class PcapInputStream : public io::BufferedInputStream { class PcapInput: public FileInput { public: Status ReadRecord(io::InputStreamInterface* s, IteratorContext* ctx, std::unique_ptr& state, int64 record_to_read, int64* record_read, std::vector* out_tensors) const override { - std::cout << "pcap_input.cc: Entering PcapInput::ReadRecord\n"; if (state.get() == nullptr) { state.reset(new PcapInputStream(s)); TF_RETURN_IF_ERROR(state.get()->ReadHeader()); } - std::cout << "PCAP header read from file.\n"; - std::cout << "Will read " << record_to_read << " records.\n"; // Let's allocate enough space for Tensor, if more than read, replace. // The output tensor has two columns (packet_timestamp,packet_data). // Hence the shape of the output tensor is (record_to_read,2) unless there are less than record_to_read packets left in the file @@ -171,34 +161,22 @@ class PcapInput: public FileInput { out_tensors->emplace_back(std::move(tensor_packet_ts)); // add timestamp column to the output tensor Tensor tensor_packet_data(ctx->allocator({}), DT_STRING, {record_to_read}); // Tensor column for packet data out_tensors->emplace_back(std::move(tensor_packet_data)); // add data column to the output tensor - std::cout << "Allocated tensor placeholders for packet timestamp and data.\n"; // read packets from the file up to record_to_read or end of file while ((*record_read) < record_to_read) { - std::cout << "Records read so far:" << (*record_read) << "\n"; int64 record_count = 0; double packet_timestamp; string packet_data_buffer; - std::cout << "Reading next record.\n"; TF_RETURN_IF_ERROR(state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count)); - std::cout << "packet record_count: " << record_count << "\n"; - std::cout << "packet timestamp: " << packet_timestamp << "\n"; - std::cout << "packet data length: " << packet_data_buffer.length() << "\n"; if (record_count > 0) { - std::cout << "Placing packet in output tensors\n"; Tensor timestamp_tensor = (*out_tensors)[0]; timestamp_tensor.flat()(*record_read) = packet_timestamp; - std::cout << "Packet timestamp placed in output tensor.\n"; Tensor data_tensor = (*out_tensors)[1]; - std::cout << "data tensor placeholder fetched.\n"; data_tensor.flat()(*record_read) = std::move(packet_data_buffer); - std::cout << "Packet data placed in output tensor.\n"; (*record_read) += record_count; - std::cout << "Record placed in output tensors\n"; } else { // no more records available to read // record_count == 0 - std::cout << "No more records left in the file.\n"; break; } } diff --git a/tests/test_pcap.py b/tests/test_pcap.py index e35508c47..8728732ac 100644 --- a/tests/test_pcap.py +++ b/tests/test_pcap.py @@ -54,7 +54,6 @@ def test_pcap_input(): next_packet_data = v[1][0] assert next_packet_timestamp assert next_packet_data - print(packets_read) if __name__ == "__main__": From afc77f1cceb1bcaf9a9974dc0f43b99acbc5e84a Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 14:32:42 -0500 Subject: [PATCH 08/12] updated git email address --- tensorflow_io/pcap/python/ops/pcap_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index ce171f5dd..043e5f471 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -24,7 +24,7 @@ pcap_ops = _load_library('_pcap_ops.so') class PcapDataset(data.Dataset): - """A pcap Dataset. Pcap is a popular file format for capturing network packets. + """ A pcap Dataset. Pcap is a popular file format for capturing network packets. """ def __init__(self, filenames, batch=None): From 2782f6fa9f8fe6c70cf325696d34a2b64f2137fd Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 14:40:10 -0500 Subject: [PATCH 09/12] email address --- tensorflow_io/pcap/python/ops/pcap_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index 043e5f471..33bbf17d7 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""PcapInput/PcapOutput.""" +""" PcapInput/PcapOutput.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function From b39d50c46eb99cf05155c078a9e6e2b14983862d Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 14:42:11 -0500 Subject: [PATCH 10/12] email --- tensorflow_io/pcap/python/ops/pcap_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index 33bbf17d7..043e5f471 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -""" PcapInput/PcapOutput.""" +"""PcapInput/PcapOutput.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function From a94b2a8e2a8016eb86439d7129fd1e9fc739e350 Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Mon, 17 Jun 2019 22:13:45 -0500 Subject: [PATCH 11/12] applied Yong's recommended changes for TF v2 python lint --- tensorflow_io/pcap/kernels/pcap_input.cc | 14 ++---- tensorflow_io/pcap/python/ops/pcap_ops.py | 55 ++++++----------------- tests/test_pcap.py | 52 +++++++++------------ 3 files changed, 39 insertions(+), 82 deletions(-) diff --git a/tensorflow_io/pcap/kernels/pcap_input.cc b/tensorflow_io/pcap/kernels/pcap_input.cc index a02265f53..ba877d416 100644 --- a/tensorflow_io/pcap/kernels/pcap_input.cc +++ b/tensorflow_io/pcap/kernels/pcap_input.cc @@ -167,7 +167,10 @@ class PcapInput: public FileInput { int64 record_count = 0; double packet_timestamp; string packet_data_buffer; - TF_RETURN_IF_ERROR(state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count)); + Status status = state.get()->ReadRecord(packet_timestamp, &packet_data_buffer, record_count); + if (!(status.ok() || errors::IsOutOfRange(status))) { + return status; + } if (record_count > 0) { Tensor timestamp_tensor = (*out_tensors)[0]; timestamp_tensor.flat()(*record_read) = packet_timestamp; @@ -180,15 +183,6 @@ class PcapInput: public FileInput { break; } } - if (*record_read < record_to_read) { - if (*record_read == 0) { - out_tensors->clear(); - } - for (size_t i = 0; i < out_tensors->size(); i++) { - Tensor tensor = (*out_tensors)[i].Slice(0, *record_read); - (*out_tensors)[i] = std::move(tensor); - } - } return Status::OK(); } Status FromStream(io::InputStreamInterface* s) override { diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index 043e5f471..fb28fa98e 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -12,19 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""PcapInput/PcapOutput.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - +"""PcapDataset""" import tensorflow as tf -from tensorflow import dtypes -from tensorflow.compat.v1 import data +from tensorflow_io.core.python.ops import data_ops as data_ops from tensorflow_io import _load_library pcap_ops = _load_library('_pcap_ops.so') -class PcapDataset(data.Dataset): - """ A pcap Dataset. Pcap is a popular file format for capturing network packets. + +class PcapDataset(data_ops.Dataset): + """A pcap Dataset. Pcap is a popular file format for capturing network packets. """ def __init__(self, filenames, batch=None): @@ -33,35 +29,12 @@ def __init__(self, filenames, batch=None): Args: filenames: A `tf.string` tensor containing one or more filenames. """ - self._data_input = pcap_ops.pcap_input(filenames) - self._batch = 0 if batch is None else batch - super(PcapDataset, self).__init__() - - - - def _inputs(self): - return [] - - def _as_variant_tensor(self): - return pcap_ops.pcap_dataset( - self._data_input, - self._batch, - output_types=self.output_types, - output_shapes=self.output_shapes) - - @property - def output_classes(self): - # we output a tensor for packet timestamp and one for packet data - return (tf.Tensor, tf.Tensor) - - @property - def output_shapes(self): - return tuple( - [tf.TensorShape([]) for _ in self._columns] - ) if self._batch is None else tuple( - [tf.TensorShape([None]), tf.TensorShape([None])] - ) - - @property - def output_types(self): - return tuple([dtypes.float64, dtypes.string]) + batch = 0 if batch is None else batch + dtypes = [tf.float64, tf.string] + shapes = [ + tf.TensorShape([]), tf.TensorShape([])] if batch == 0 else [ + tf.TensorShape([None]), tf.TensorShape([None])] + super(PcapDataset, self).__init__( + pcap_ops.pcap_dataset, + pcap_ops.pcap_input(filenames), + batch, dtypes, shapes) \ No newline at end of file diff --git a/tests/test_pcap.py b/tests/test_pcap.py index 8728732ac..7904e75d9 100644 --- a/tests/test_pcap.py +++ b/tests/test_pcap.py @@ -21,40 +21,30 @@ import os import tensorflow as tf - -tf.compat.v1.disable_eager_execution() - import tensorflow_io.pcap as pcap_io # pylint: disable=wrong-import-position -def test_pcap_input(): - """test_pcap_input - """ - print("Testing PcapDataset") - pcap_filename = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "test_pcap", "http.pcap") - file_url = "file://" + pcap_filename - url_filenames = [file_url] - dataset = pcap_io.PcapDataset(url_filenames, batch=1) - iterator = dataset.make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - with tf.compat.v1.Session() as sess: - sess.run(init_op) - v = sess.run(get_next) - first_packet_timestamp = v[0][0] - first_packet_data = v[1][0] - assert first_packet_timestamp == 1084443427.311224 # we know this is the correct value in the test pcap file - assert len(first_packet_data) == 62 # we know this is the correct packet data buffer length in the test pcap file +if not (hasattr(tf, "version") and tf.version.VERSION.startswith("2.")): + tf.compat.v1.enable_eager_execution() - packets_total = 43 # we know this is the correct number of packets in the test pcap file - for packets_read in range(1, packets_total): - v = sess.run(get_next) - next_packet_timestamp = v[0][0] - next_packet_data = v[1][0] - assert next_packet_timestamp - assert next_packet_data - +def test_pcap_input(): + """test_pcap_input + """ + print("Testing PcapDataset") + pcap_filename = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "test_pcap", "http.pcap") + file_url = "file://" + pcap_filename + url_filenames = [file_url] + dataset = pcap_io.PcapDataset(url_filenames, batch=1) + + packets_total = 0 + for v in dataset: + (packet_timestamp, packet_data) = v + if packets_total == 0: + assert packet_timestamp.numpy()[0] == 1084443427.311224 # we know this is the correct value in the test pcap file + assert len(packet_data.numpy()[0]) == 62 # we know this is the correct packet data buffer length in the test pcap file + packets_total += 1 + assert packets_total == 43 # we know this is the correct number of packets in the test pcap file if __name__ == "__main__": - test.main() + test.main() From 3187d5c2468addbbc398ba70cf40f6e6bf278273 Mon Sep 17 00:00:00 2001 From: Ivelin Ivanov Date: Tue, 18 Jun 2019 12:55:54 -0500 Subject: [PATCH 12/12] renamed test_pcap.py to test_pcap_eager.py lint pcap/pcap_ops.py --- tensorflow_io/pcap/python/ops/pcap_ops.py | 2 +- tests/{test_pcap.py => test_pcap_eager.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/{test_pcap.py => test_pcap_eager.py} (100%) diff --git a/tensorflow_io/pcap/python/ops/pcap_ops.py b/tensorflow_io/pcap/python/ops/pcap_ops.py index fb28fa98e..5fa527d5b 100644 --- a/tensorflow_io/pcap/python/ops/pcap_ops.py +++ b/tensorflow_io/pcap/python/ops/pcap_ops.py @@ -37,4 +37,4 @@ def __init__(self, filenames, batch=None): super(PcapDataset, self).__init__( pcap_ops.pcap_dataset, pcap_ops.pcap_input(filenames), - batch, dtypes, shapes) \ No newline at end of file + batch, dtypes, shapes) diff --git a/tests/test_pcap.py b/tests/test_pcap_eager.py similarity index 100% rename from tests/test_pcap.py rename to tests/test_pcap_eager.py