ALPSCore reference
archive.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 1998-2018 ALPS Collaboration. See COPYRIGHT.TXT
3  * All rights reserved. Use is subject to license terms. See LICENSE.TXT
4  * For use in publications, see ACKNOWLEDGE.TXT
5  */
6 
7 #include <alps/hdf5/archive.hpp>
11 
12 #include <memory>
13 
14 #include <hdf5.h>
15 
16 #include <sstream>
17 #include <fstream>
18 #include <iostream>
19 #include <typeinfo>
20 
21 #include "common.hpp"
22 #include "archivecontext.hpp"
23 
24 namespace alps {
25  namespace hdf5 {
26 
27  namespace detail {
28  hid_t open_attribute(archive const & ar, hid_t file_id, std::string path);
29  herr_t list_children_visitor(hid_t, char const * n, const H5L_info_t *, void * d);
30  herr_t list_attributes_visitor(hid_t, char const * n, const H5A_info_t *, void * d);
31  }
32 
33  archive::archive() : context_(NULL) {}
34 
35  archive::archive(std::string const & filename, int prop) : context_(NULL) {
36  std::string mode="";
37  if (prop & COMPRESS) mode += "c";
38  if (prop & MEMORY) mode += "m";
39 
40  prop = prop & ~(COMPRESS|MEMORY);
41 
42  if (prop == READ) {
43  mode += "r";
44  } else if (prop == WRITE) {
45  mode += "w";
46  } else {
47  throw wrong_mode("Unsupported mode flags when openinge file '"+filename+"'" + ALPS_STACKTRACE);
48  }
49  open(filename,mode);
50  }
51 
52  archive::archive(std::string const & filename, std::string mode) : context_(NULL) {
53  open(filename, mode);
54  }
55 
57  : current_(arg.current_)
58  , context_(arg.context_)
59  {
60  if (context_ != NULL) {
62  ++ref_cnt_[file_key(context_->filename_, context_->memory_)].second;
63  }
64  }
65 
67  if (context_ != NULL)
68  try {
69  close();
70  } catch (std::exception & ex) {
71  std::cerr << "Error destructing archive of file '" << ex.what() << std::endl;
72  std::abort();
73  }
74  }
75 
76 /* This method does not seem to be ever used
77 ************************************
78 * void archive::abort() {
79 * // Do not use a lock here, else deadlocking is really likly
80 * for (std::map<std::string, std::pair<detail::archivecontext *, std::size_t> >::iterator it = ref_cnt_.begin(); it != ref_cnt_.end(); ++it) {
81 * bool replace = it->second.first->replace_;
82 * std::string filename = it->second.first->filename_;
83 * it->second.first->replace_ = false;
84 * delete it->second.first;
85 * if (replace && boost::filesystem::exists(filename))
86 * boost::filesystem::remove(filename);
87 * }
88 * ref_cnt_.clear();
89 * }
90 *************************************/
91 
92  void archive::close() {
93  if (context_ == NULL)
94  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
96  H5Fflush(context_->file_id_, H5F_SCOPE_GLOBAL);
97  if (!--ref_cnt_[file_key(context_->filename_, context_->memory_)].second) {
98  ref_cnt_.erase(file_key(context_->filename_, context_->memory_));
99  delete context_;
100  }
101  context_ = NULL;
102  }
103 
104  void archive::open(const std::string & filename, const std::string &mode) {
105  if(is_open())
106  throw archive_opened("the archive '"+ filename + "' is already opened" + ALPS_STACKTRACE);
107  if (mode.find_first_not_of("rwacm")!=std::string::npos)
108  throw wrong_mode("Incorrect mode '"+mode+"' opening file '"+filename+"'" + ALPS_STACKTRACE);
109 
110  construct(filename,
111  (mode.find_last_of('w') == std::string::npos ? 0 : WRITE) //@todo FIXME_DEBOOST: "w" is equiv to "a"
112  | (mode.find_last_of('a') == std::string::npos ? 0 : WRITE)
113  | (mode.find_last_of('c') == std::string::npos ? 0 : COMPRESS)
114  | (mode.find_last_of('m') == std::string::npos ? 0 : MEMORY)
115  );
116  }
117 
119  return context_ != NULL;
120  }
121 
122  std::string const & archive::get_filename() const {
123  if (context_ == NULL)
124  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
125  return context_->filename_;
126  }
127 
128  std::string archive::encode_segment(std::string segment) const {
129  char chars[] = {'&', '/'};
130  for (std::size_t i = 0; i < sizeof(chars); ++i)
131  for (std::size_t pos = segment.find_first_of(chars[i]); pos < std::string::npos; pos = segment.find_first_of(chars[i], pos + 1))
132  segment = segment.substr(0, pos) + "&#" + cast<std::string>(static_cast<int>(chars[i])) + ";" + segment.substr(pos + 1);
133  return segment;
134  }
135 
136  std::string archive::decode_segment(std::string segment) const {
137  for (std::size_t pos = segment.find_first_of('&'); pos < std::string::npos; pos = segment.find_first_of('&', pos + 1))
138  segment = segment.substr(0, pos)
139  + static_cast<char>(cast<int>(segment.substr(pos + 2, segment.find_first_of(';', pos) - pos - 2)))
140  + segment.substr(segment.find_first_of(';', pos) + 1);
141  return segment;
142  }
143 
144  std::string archive::get_context() const {
145  return current_;
146  }
147 
148  void archive::set_context(std::string const & context) {
150  current_ = complete_path(context);
151  }
152 
153  std::string archive::complete_path(std::string path) const {
154  if (path.size() > 1 && *path.rbegin() == '/')
155  path = path.substr(0, path.size() - 1);
156  if (path.size() && path[0] == '/')
157  return path;
158  else if (path.size() < 2 || path.substr(0, 2) != "..")
159  return current_ + (current_.size() == 1 || !path.size() ? "" : "/") + path;
160  else {
161  std::string ctx = current_;
162  while (ctx.size() && path.size() && path.substr(0, 2) == "..") {
163  ctx = ctx.substr(0, ctx.find_last_of('/'));
164  path = path.size() == 2 ? "" : path.substr(3);
165  }
166  return ctx + (ctx.size() == 1 || !path.size() ? "" : "/") + path;
167  }
168  }
169 
170  bool archive::is_data(std::string path) const {
171  if (context_ == NULL)
172  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
173  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
174  throw invalid_path("no data path: " + path + ALPS_STACKTRACE);
176  hid_t id = H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT);
177  return id < 0 ? false : detail::check_data(id) != 0;
178  }
179 
180  bool archive::is_attribute(std::string path) const {
181  if (context_ == NULL)
182  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
183  if ((path = complete_path(path)).find_last_of('@') == std::string::npos)
184  return false;
186  return detail::check_error(H5Aexists_by_name(context_->file_id_, path.substr(0, path.find_last_of('@')).c_str(), path.substr(path.find_last_of('@') + 1).c_str(), H5P_DEFAULT));
187  }
188 
189  bool archive::is_group(std::string path) const {
190  if (context_ == NULL)
191  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
192  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
193  return false;
195  hid_t id = H5Gopen2(context_->file_id_, path.c_str(), H5P_DEFAULT);
196  return id < 0 ? false : detail::check_group(id) != 0;
197  }
198 
199  bool archive::is_scalar(std::string path) const {
200  hid_t space_id;
201  if (context_ == NULL)
202  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
204  if ((path = complete_path(path)).find_last_of('@') != std::string::npos && is_attribute(path)) {
205  detail::attribute_type attr_id(detail::open_attribute(*this, context_->file_id_, path));
206  space_id = H5Aget_space(attr_id);
207  } else if (path.find_last_of('@') == std::string::npos && is_data(path)) {
208  detail::data_type data_id(H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
209  space_id = H5Dget_space(data_id);
210  } else
211  #ifdef ALPS_HDF5_READ_GREEDY
212  return false;
213  #else
214  throw path_not_found("error reading path " + path + ALPS_STACKTRACE);
215  #endif
216  H5S_class_t type = H5Sget_simple_extent_type(space_id);
217  detail::check_space(space_id);
218  if (type == H5S_NO_CLASS)
219  throw archive_error("error reading class " + path + ALPS_STACKTRACE);
220  return type == H5S_SCALAR;
221  }
222 
223  bool archive::is_null(std::string path) const {
224  hid_t space_id;
225  if (context_ == NULL)
226  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
228  if ((path = complete_path(path)).find_last_of('@') != std::string::npos) {
229  detail::attribute_type attr_id(detail::open_attribute(*this, context_->file_id_, path));
230  space_id = H5Aget_space(attr_id);
231  } else {
232  detail::data_type data_id(H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
233  space_id = H5Dget_space(data_id);
234  }
235  H5S_class_t type = H5Sget_simple_extent_type(space_id);
236  detail::check_space(space_id);
237  if (type == H5S_NO_CLASS)
238  throw archive_error("error reading class " + path + ALPS_STACKTRACE);
239  return type == H5S_NULL;
240  }
241 
242  bool archive::is_complex(std::string path) const {
243  if (context_ == NULL)
244  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
246  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
247  return is_attribute(path.substr(0, path.find_last_of('@')) + "@__complex__:" + path.substr(path.find_last_of('@') + 1))
248  && is_scalar(path.substr(0, path.find_last_of('@')) + "@__complex__:" + path.substr(path.find_last_of('@') + 1));
249  else if (is_group(path)) {
250  std::vector<std::string> children = list_children(path);
251  for (std::size_t i = 0; i < children.size(); ++i)
252  if (is_complex(path + "/" + children[i]))
253  return true;
254  return false;
255  } else
256  return is_attribute(path + "/@__complex__") && is_scalar(path + "/@__complex__");
257  }
258 
259  std::vector<std::string> archive::list_children(std::string path) const {
260  if (context_ == NULL)
261  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
262  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
263  throw invalid_path("no group path: " + path + ALPS_STACKTRACE);
264  std::vector<std::string> list;
266  if (!is_group(path))
267  throw path_not_found("The group '" + path + "' does not exist." + ALPS_STACKTRACE);
268  detail::group_type group_id(H5Gopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
269  detail::check_error(H5Literate(group_id, H5_INDEX_NAME, H5_ITER_NATIVE, NULL, detail::list_children_visitor, &list));
270  return list;
271  }
272 
273  std::vector<std::string> archive::list_attributes(std::string path) const {
274  if (context_ == NULL)
275  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
276  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
277  throw invalid_path("no group or data path: " + path + ALPS_STACKTRACE);
278  std::vector<std::string> list;
280  if (is_group(path)) {
281  detail::group_type id(H5Gopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
282  detail::check_error(H5Aiterate2(id, H5_INDEX_CRT_ORDER, H5_ITER_NATIVE, NULL, detail::list_attributes_visitor, &list));
283  } else if (is_data(path)) {
284  detail::data_type id(H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
285  detail::check_error(H5Aiterate2(id, H5_INDEX_CRT_ORDER, H5_ITER_NATIVE, NULL, detail::list_attributes_visitor, &list));
286  } else
287  throw path_not_found("The path '" + path + "' does not exist." + ALPS_STACKTRACE);
288  return list;
289  }
290 
291  std::vector<std::size_t> archive::extent(std::string path) const {
292  if (context_ == NULL)
293  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
294  if (is_null(path = complete_path(path)))
295  return std::vector<std::size_t>(1, 0);
296  else if (is_scalar(path))
297  return std::vector<std::size_t>(1, 1);
298  std::vector<hsize_t> buffer(dimensions(path), 0);
299  hid_t space_id;
301  if (path.find_last_of('@') != std::string::npos) {
302  detail::attribute_type attr_id(detail::open_attribute(*this, context_->file_id_, path));
303  space_id = H5Aget_space(attr_id);
304  } else {
305  detail::data_type data_id(H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
306  space_id = H5Dget_space(data_id);
307  }
308  detail::check_error(H5Sget_simple_extent_dims(space_id, &buffer.front(), NULL));
309  detail::check_space(space_id);
310  std::vector<std::size_t> extent(buffer.begin(), buffer.end());
311  return extent;
312  }
313 
314  std::size_t archive::dimensions(std::string path) const {
315  if (context_ == NULL)
316  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
318  if ((path = complete_path(path)).find_last_of('@') != std::string::npos) {
319  detail::attribute_type attr_id(detail::open_attribute(*this, context_->file_id_, path));
320  return detail::check_error(H5Sget_simple_extent_dims(detail::space_type(H5Aget_space(attr_id)), NULL, NULL));
321  } else {
322  detail::data_type data_id(H5Dopen2(context_->file_id_, path.c_str(), H5P_DEFAULT));
323  return detail::check_error(H5Sget_simple_extent_dims(detail::space_type(H5Dget_space(data_id)), NULL, NULL));
324  }
325  }
326 
327  void archive::create_group(std::string path) const {
328  if (context_ == NULL)
329  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
330  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
331  throw invalid_path("no group path: " + path + ALPS_STACKTRACE);
333  if (is_data(path))
334  delete_data(path);
335  if (!is_group(path)) {
336  std::size_t pos;
337  hid_t group_id = -1;
338  for (pos = path.find_last_of('/'); group_id < 0 && pos > 0 && pos < std::string::npos; pos = path.find_last_of('/', pos - 1))
339  group_id = H5Gopen2(context_->file_id_, path.substr(0, pos).c_str(), H5P_DEFAULT);
340  if (group_id < 0) {
341  if ((pos = path.find_first_of('/', 1)) != std::string::npos) {
342  detail::property_type prop_id(H5Pcreate(H5P_GROUP_CREATE));
343  detail::check_error(H5Pset_link_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
344  detail::check_error(H5Pset_attr_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
345  detail::check_group(H5Gcreate2(context_->file_id_, path.substr(0, pos).c_str(), H5P_DEFAULT, prop_id, H5P_DEFAULT));
346  }
347  } else {
348  pos = path.find_first_of('/', pos + 1);
349  detail::check_group(group_id);
350  }
351  while (pos != std::string::npos && (pos = path.find_first_of('/', pos + 1)) != std::string::npos && pos > 0) {
352  detail::property_type prop_id(H5Pcreate(H5P_GROUP_CREATE));
353  detail::check_error(H5Pset_link_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
354  detail::check_error(H5Pset_attr_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
355  detail::check_group(H5Gcreate2(context_->file_id_, path.substr(0, pos).c_str(), H5P_DEFAULT, prop_id, H5P_DEFAULT));
356  }
357  detail::property_type prop_id(H5Pcreate(H5P_GROUP_CREATE));
358  detail::check_error(H5Pset_link_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
359  detail::check_error(H5Pset_attr_creation_order(prop_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)));
360  detail::check_group(H5Gcreate2(context_->file_id_, path.c_str(), H5P_DEFAULT, prop_id, H5P_DEFAULT));
361  }
362  }
363 
364  void archive::delete_data(std::string path) const {
365  if (context_ == NULL)
366  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
367  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
368  throw invalid_path("no data path: " + path + ALPS_STACKTRACE);
370  if (is_data(path))
371  detail::check_error(H5Ldelete(context_->file_id_, path.c_str(), H5P_DEFAULT));
372  else if (is_group(path))
373  throw invalid_path("the path contains a group: " + path + ALPS_STACKTRACE);
374  }
375 
376  void archive::delete_group(std::string path) const {
377  if (context_ == NULL)
378  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
379  if ((path = complete_path(path)).find_last_of('@') != std::string::npos)
380  throw invalid_path("no group path: " + path + ALPS_STACKTRACE);
382  if (is_group(path))
383  detail::check_error(H5Ldelete(context_->file_id_, path.c_str(), H5P_DEFAULT));
384  else if (is_data(path))
385  throw invalid_path("the path contains a dataset: " + path + ALPS_STACKTRACE);
386  }
387 
388  void archive::delete_attribute(std::string path) const {
389  if (context_ == NULL)
390  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
391  if ((path = complete_path(path)).find_last_of('@') == std::string::npos)
392  throw invalid_path("no attribute path: " + path + ALPS_STACKTRACE);
393  // TODO: implement
394  throw std::logic_error("Not implemented!" + ALPS_STACKTRACE);
395  }
396 
397  void archive::set_complex(std::string path) {
398  if (context_ == NULL)
399  throw archive_closed("the archive is closed" + ALPS_STACKTRACE);
401  if (path.find_last_of('@') != std::string::npos)
402  write(path.substr(0, path.find_last_of('@')) + "@__complex__:" + path.substr(path.find_last_of('@') + 1), true);
403  else {
404  if (is_group(path)) {
405  std::vector<std::string> children = list_children(path);
406  for (std::vector<std::string>::const_iterator it = children.begin(); it != children.end(); ++it)
407  set_complex(path + "/" + *it);
408  } else
409  write(path + "/@__complex__", true);
410  }
411  }
412 
413  detail::archive_proxy<archive> archive::operator[](std::string const & path) {
414  return detail::archive_proxy<archive>(path, *this);
415  }
416 
417  void archive::construct(std::string const & filename, std::size_t props) {
419  detail::check_error(H5Eset_auto2(H5E_DEFAULT, NULL, NULL));
420  if (props & COMPRESS) {
421  unsigned int flag;
422  detail::check_error(H5Zget_filter_info(H5Z_FILTER_SZIP, &flag));
423  props &= (flag & H5Z_FILTER_CONFIG_ENCODE_ENABLED ? ~0x00 : ~COMPRESS);
424  }
425  if (ref_cnt_.find(file_key(filename, props & MEMORY)) == ref_cnt_.end())
426  ref_cnt_.insert(std::make_pair(
427  file_key(filename, props & MEMORY)
428  , std::make_pair(context_ = new detail::archivecontext(filename, props & WRITE, false/*props & REPLACE*/, props & COMPRESS, props & MEMORY), 1)
429  ));
430  else {
431  context_ = ref_cnt_.find(file_key(filename, props & MEMORY))->second.first;
432  context_->grant(props & WRITE, false/*props & REPLACE*/);
433  ++ref_cnt_.find(file_key(filename, props & MEMORY))->second.second;
434  }
435  }
436 
437  std::string archive::file_key(std::string filename, bool memory) const {
438  return (memory ? "m" : "_") + filename;
439  }
440 
441 #ifndef ALPS_SINGLE_THREAD
442  boost::recursive_mutex archive::mutex_;
443 #endif
444  std::map<std::string, std::pair<detail::archivecontext *, std::size_t> > archive::ref_cnt_;
445  }
446 }
447 
448 #undef ALPS_HDF5_FOREACH_NATIVE_TYPE_INTEGRAL
std::size_t dimensions(std::string path) const
Definition: archive.cpp:314
bool is_scalar(std::string path) const
Definition: archive.cpp:199
detail::archive_proxy< archive > operator[](std::string const &path)
Definition: archive.cpp:413
void open(const std::string &filename, const std::string &mode="r")
Definition: archive.cpp:104
bool is_attribute(std::string path) const
Definition: archive.cpp:180
bool is_data(std::string path) const
Definition: archive.cpp:170
void delete_data(std::string path) const
Definition: archive.cpp:364
void delete_group(std::string path) const
Definition: archive.cpp:376
#define ALPS_HDF5_LOCK_MUTEX
Definition: common.hpp:19
std::vector< std::string > list_attributes(std::string path) const
Definition: archive.cpp:273
virtual ~archive()
Definition: archive.cpp:66
void set_context(std::string const &context)
Definition: archive.cpp:148
void delete_attribute(std::string path) const
Definition: archive.cpp:388
std::vector< std::size_t > extent(std::string path) const
Definition: archive.cpp:291
#define ALPS_HDF5_FAKE_THREADSAFETY
Definition: common.hpp:25
void create_group(std::string path) const
Definition: archive.cpp:327
std::string get_context() const
Definition: archive.cpp:144
bool is_null(std::string path) const
Definition: archive.cpp:223
std::string encode_segment(std::string segment) const
Definition: archive.cpp:128
std::vector< std::string > list_children(std::string path) const
Definition: archive.cpp:259
auto write(std::string path, T const *value, std::vector< std::size_t > size, std::vector< std::size_t > chunk=std::vector< std::size_t >(), std::vector< std::size_t > offset=std::vector< std::size_t >()) const -> typename std::enable_if<!is_native_type< T >::value, void >::type
Definition: archive.hpp:172
void set_complex(std::string path)
Definition: archive.cpp:397
#define ALPS_STACKTRACE
Definition: stacktrace.hpp:37
bool is_group(std::string path) const
Definition: archive.cpp:189
std::string complete_path(std::string path) const
Definition: archive.cpp:153
std::string decode_segment(std::string segment) const
Definition: archive.cpp:136
std::string const & get_filename() const
Definition: archive.cpp:122
bool is_complex(std::string path) const
Definition: archive.cpp:242