unilink  0.4.3
A simple C++ library for unified async communication
packet_framer.cc
Go to the documentation of this file.
1 /*
2  * Copyright 2025 Jinwoo Sung
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <algorithm>
20 #include <iterator>
21 #include <stdexcept>
22 
23 namespace unilink {
24 namespace framer {
25 
26 PacketFramer::PacketFramer(const std::vector<uint8_t>& start_pattern, const std::vector<uint8_t>& end_pattern,
27  size_t max_length)
28  : start_pattern_(start_pattern), end_pattern_(end_pattern), max_length_(max_length), state_(State::Sync) {
29  if (start_pattern_.empty() && end_pattern_.empty()) {
30  throw std::invalid_argument("PacketFramer: start_pattern and end_pattern cannot both be empty.");
31  }
32 }
33 
35  if (data.empty()) return;
36 
37  // Fast Path: Zero-copy processing if buffer is empty
38  if (buffer_.empty()) {
39  size_t processed_count = 0;
40  while (processed_count < data.size()) {
41  // Find start pattern
42  auto search_start = data.begin() + static_cast<std::ptrdiff_t>(processed_count);
43  auto it_start = std::search(search_start, data.end(), start_pattern_.begin(), start_pattern_.end());
44 
45  if (it_start == data.end()) {
46  // Start pattern not found.
47  // Keep partial match at the end if applicable.
48  size_t remaining = data.size() - processed_count;
49  size_t keep_len = (start_pattern_.size() > 1) ? (start_pattern_.size() - 1) : 0;
50  if (remaining > keep_len) {
51  processed_count = data.size() - keep_len;
52  }
53  break;
54  }
55 
56  // Found start pattern
57  size_t start_idx = static_cast<size_t>(std::distance(data.begin(), it_start));
58  size_t search_end_start_idx = start_idx + start_pattern_.size();
59 
60  if (end_pattern_.empty()) {
61  // Assume minimal packet is start pattern only
62  size_t packet_len = start_pattern_.size();
63  if (on_message_) {
64  on_message_(data.subspan(start_idx, packet_len));
65  }
66  processed_count = start_idx + packet_len;
67  continue;
68  }
69 
70  // Find end pattern
71  auto search_end_start = data.begin() + static_cast<std::ptrdiff_t>(search_end_start_idx);
72  auto it_end = std::search(search_end_start, data.end(), end_pattern_.begin(), end_pattern_.end());
73 
74  if (it_end == data.end()) {
75  // Found start pattern but not end pattern.
76  // Buffer everything starting from start_idx.
77  processed_count = start_idx;
78  // The remaining data starts with start_pattern, so we will transition to Collect state
79  // after appending to buffer.
80  break;
81  }
82 
83  // Found end pattern
84  size_t packet_len = static_cast<size_t>(std::distance(data.begin(), it_end)) + end_pattern_.size() - start_idx;
85 
86  if (packet_len <= max_length_) {
87  if (on_message_) {
88  on_message_(data.subspan(start_idx, packet_len));
89  }
90  }
91  // If > max_length, discard by advancing processed_count past it
92 
93  processed_count = start_idx + packet_len;
94  }
95 
96  if (processed_count < data.size()) {
97  buffer_.insert(buffer_.end(), data.begin() + static_cast<std::ptrdiff_t>(processed_count), data.end());
98 
99  // Update state if we buffered a partial packet starting with start_pattern
100  if (state_ == State::Sync && !buffer_.empty()) {
101  if (buffer_.size() >= start_pattern_.size()) {
102  if (std::equal(start_pattern_.begin(), start_pattern_.end(), buffer_.begin())) {
103  state_ = State::Collect;
104  scanned_idx_ = start_pattern_.size();
105  }
106  }
107  }
108  }
109  return;
110  }
111 
112  buffer_.insert(buffer_.end(), data.begin(), data.end());
113 
114  while (true) {
115  if (state_ == State::Sync) {
116  if (start_pattern_.empty()) {
117  state_ = State::Collect;
118  continue;
119  }
120 
121  auto it = std::search(buffer_.begin(), buffer_.end(), start_pattern_.begin(), start_pattern_.end());
122  if (it != buffer_.end()) {
123  // Found start pattern.
124  // Discard everything before start pattern.
125  if (it != buffer_.begin()) {
126  buffer_.erase(buffer_.begin(), it);
127  }
128  state_ = State::Collect;
129  // Start scanning for end pattern after the start pattern we just found
130  scanned_idx_ = start_pattern_.size();
131  // Continue to check for end pattern immediately
132  } else {
133  // Start pattern not found.
134  // Keep partial match at the end.
135  if (start_pattern_.size() > 1) {
136  size_t keep_len = start_pattern_.size() - 1;
137  if (buffer_.size() > keep_len) {
138  buffer_.erase(buffer_.begin(), buffer_.end() - static_cast<std::ptrdiff_t>(keep_len));
139  }
140  } else {
141  buffer_.clear();
142  }
143  break; // Need more data
144  }
145  } else if (state_ == State::Collect) {
146  if (end_pattern_.empty()) {
147  // If end pattern is empty, packet ends immediately after start pattern?
148  // Assume minimal packet is start pattern only
149  size_t packet_len = start_pattern_.size();
150  if (on_message_) {
151  on_message_(memory::ConstByteSpan(buffer_.data(), packet_len));
152  }
153  if (buffer_.empty()) return;
154 
155  buffer_.erase(buffer_.begin(), buffer_.begin() + static_cast<std::ptrdiff_t>(packet_len));
156  state_ = State::Sync;
157  continue;
158  }
159 
160  // Search for end pattern *after* start pattern
161  // Optimization: use scanned_idx_ to avoid re-scanning
162  size_t search_offset = std::max(start_pattern_.size(), scanned_idx_);
163 
164  // Back up slightly to catch split end pattern if we are resuming search
165  if (search_offset > start_pattern_.size()) {
166  size_t overlap = (end_pattern_.size() > 1) ? (end_pattern_.size() - 1) : 0;
167  if (search_offset >= overlap) {
168  search_offset -= overlap;
169  } else {
170  search_offset = 0;
171  }
172  }
173 
174  // Safety clamp to ensure we don't search inside start pattern
175  if (search_offset < start_pattern_.size()) {
176  search_offset = start_pattern_.size();
177  }
178 
179  if (buffer_.size() < search_offset) {
180  // Should not happen if Sync worked correctly
181  break;
182  }
183 
184  auto search_start = buffer_.begin() + static_cast<std::ptrdiff_t>(search_offset);
185  auto it = std::search(search_start, buffer_.end(), end_pattern_.begin(), end_pattern_.end());
186 
187  if (it != buffer_.end()) {
188  // Found end pattern.
189  size_t packet_len = static_cast<size_t>(std::distance(buffer_.begin(), it)) + end_pattern_.size();
190 
191  if (packet_len <= max_length_) {
192  if (on_message_) {
193  on_message_(memory::ConstByteSpan(buffer_.data(), packet_len));
194  }
195  if (buffer_.empty()) return;
196 
197  buffer_.erase(buffer_.begin(), buffer_.begin() + static_cast<std::ptrdiff_t>(packet_len));
198  state_ = State::Sync;
199  scanned_idx_ = 0;
200  } else {
201  // Exceeded max length, discard packet
202  buffer_.erase(buffer_.begin(), buffer_.begin() + static_cast<std::ptrdiff_t>(packet_len));
203  state_ = State::Sync;
204  scanned_idx_ = 0;
205  }
206  } else {
207  // End pattern not found.
208  scanned_idx_ = buffer_.size();
209  if (buffer_.size() > max_length_) {
210  // Exceeded limit while collecting. Reset.
211  buffer_.clear();
212  state_ = State::Sync;
213  scanned_idx_ = 0;
214  }
215  break; // Need more data
216  }
217  }
218  }
219 }
220 
221 void PacketFramer::set_on_message(MessageCallback cb) { on_message_ = std::move(cb); }
222 
224  buffer_.clear();
225  state_ = State::Sync;
226  scanned_idx_ = 0;
227 }
228 
229 } // namespace framer
230 } // namespace unilink