1 <?php
2 /**
3 * A base for bulk loaders of content into the SilverStripe database.
4 * Bulk loaders give SilverStripe authors the ability to do large-scale uploads into their Sapphire databases.
5 *
6 * You can configure column-handling,
7 *
8 * @todo Add support for adding/editing has_many relations.
9 * @todo Add support for deep chaining of relation properties (e.g. Player.Team.Stats.GoalCount)
10 * @todo Character conversion
11 *
12 * @see http://rfc.net/rfc4180.html
13 * @package cms
14 * @subpackage bulkloading
15 * @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
16 */
17 abstract class BulkLoader extends ViewableData {
18
19 /**
20 * Each row in the imported dataset should map to one instance
21 * of this class (with optional property translation
22 * through {@self::$columnMaps}.
23 *
24 * @var string
25 */
26 public $objectClass;
27
28 /**
29 * Override this on subclasses to give the specific functions names.
30 *
31 * @var string
32 */
33 public static $title;
34
35 /**
36 * Map columns to DataObject-properties.
37 * If not specified, we assume the first row
38 * in the file contains the column headers.
39 * The order of your array should match the column order.
40 *
41 * The column count should match the count of array elements,
42 * fill with NULL values if you want to skip certain columns.
43 *
44 * You can also combine {@link $hasHeaderRow} = true and {@link $columnMap}
45 * and omit the NULL values in your map.
46 *
47 * Supports one-level chaining of has_one relations and properties with dot notation
48 * (e.g. Team.Title). The first part has to match a has_one relation name
49 * (not necessarily the classname of the used relation).
50 *
51 * <code>
52 * <?php
53 * // simple example
54 * array(
55 * 'Title',
56 * 'Birthday'
57 * )
58 *
59 * // complex example
60 * array(
61 * 'first name' => 'FirstName', // custom column name
62 * null, // ignored column
63 * 'RegionID', // direct has_one/has_many ID setting
64 * 'OrganisationTitle', // create has_one relation to existing record using $relationCallbacks
65 * 'street' => 'Organisation.StreetName', // match an existing has_one or create one and write property.
66 * );
67 * ?>
68 * </code>
69 *
70 * @var array
71 */
72 public $columnMap = array();
73
74 /**
75 * Find a has_one relation based on a specific column value.
76 *
77 * <code>
78 * <?php
79 * array(
80 * 'OrganisationTitle' => array(
81 * 'relationname' => 'Organisation', // relation accessor name
82 * 'callback' => 'getOrganisationByTitle',
83 * );
84 * );
85 * ?>
86 * </code>
87 *
88 * @var array
89 */
90 public $relationCallbacks = array();
91
92 /**
93 * Specifies how to determine duplicates based on one or more provided fields
94 * in the imported data, matching to properties on the used {@link DataObject} class.
95 * Alternatively the array values can contain a callback method (see example for
96 * implementation details). The callback method should be defined on the source class.
97 *
98 * NOTE: If you're trying to get a unique Member record by a particular field that
99 * isn't Email, you need to ensure that Member is correctly set to the unique field
100 * you want, as it will merge any duplicates during {@link Member::onBeforeWrite()}.
101 *
102 * {@see Member::set_unique_identifier_field()}.
103 *
104 * If multiple checks are specified, the first one "wins".
105 *
106 * <code>
107 * <?php
108 * array(
109 * 'customernumber' => 'ID',
110 * 'phonenumber' => array(
111 * 'callback' => 'getByImportedPhoneNumber'
112 * )
113 * );
114 * ?>
115 * </code>
116 *
117 * @var array
118 */
119 public $duplicateChecks = array();
120
121 /**
122 * @var Boolean $clearBeforeImport Delete ALL records before importing.
123 */
124 public $deleteExistingRecords = false;
125
126 function __construct($objectClass) {
127 $this->objectClass = $objectClass;
128 parent::__construct();
129 }
130
131 /*
132 * Load the given file via {@link self::processAll()} and {@link self::processRecord()}.
133 * Optionally truncates (clear) the table before it imports.
134 *
135 * @return BulkLoader_Result See {@link self::processAll()}
136 */
137 public function load($filepath) {
138 ini_set('max_execution_time', 3600);
139 increase_memory_limit_to('512M');
140
141 //get all instances of the to be imported data object
142 if($this->deleteExistingRecords) {
143 $q = singleton($this->objectClass)->buildSQL();
144 $q->select = array('"ID"');
145 $ids = $q->execute()->column('ID');
146 foreach($ids as $id) {
147 $obj = DataObject::get_by_id($this->objectClass, $id);
148 $obj->delete();
149 $obj->destroy();
150 unset($obj);
151 }
152 }
153
154 return $this->processAll($filepath);
155 }
156
157 /**
158 * Preview a file import (don't write anything to the database).
159 * Useful to analyze the input and give the users a chance to influence
160 * it through a UI.
161 *
162 * @todo Implement preview()
163 *
164 * @param string $filepath Absolute path to the file we're importing
165 * @return array See {@link self::processAll()}
166 */
167 public function preview($filepath) {
168 user_error("BulkLoader::preview(): Not implemented", E_USER_ERROR);
169 }
170
171 /**
172 * Process every record in the file
173 *
174 * @param string $filepath Absolute path to the file we're importing (with UTF8 content)
175 * @param boolean $preview If true, we'll just output a summary of changes but not actually do anything
176 * @return BulkLoader_Result A collection of objects which are either created, updated or deleted.
177 * 'message': free-text string that can optionally provide some more information about what changes have
178 */
179 abstract protected function processAll($filepath, $preview = false);
180
181
182 /**
183 * Process a single record from the file.
184 *
185 * @param array $record An map of the data, keyed by the header field defined in {@link self::$columnMap}
186 * @param array $columnMap
187 * @param $result BulkLoader_Result (passed as reference)
188 * @param boolean $preview
189 */
190 abstract protected function processRecord($record, $columnMap, &$result, $preview = false);
191
192 /**
193 * Return a FieldSet containing all the options for this form; this
194 * doesn't include the actual upload field itself
195 */
196 public function getOptionFields() {}
197
198 /**
199 * Return a human-readable name for this object.
200 * It defaults to the class name can be overridden by setting the static variable $title
201 *
202 * @return string
203 */
204 public function Title() {
205 return ($title = $this->stat('title')) ? $title : $this->class;
206 }
207
208 /**
209 * Get a specification of all available columns and relations on the used model.
210 * Useful for generation of spec documents for technical end users.
211 *
212 * Return Format:
213 * <code>
214 * array(
215 * 'fields' => array('myFieldName'=>'myDescription'),
216 * 'relations' => array('myRelationName'=>'myDescription'),
217 * )
218 * </code>
219 *
220 * @todo Mix in custom column mappings
221 *
222 * @return array
223 **/
224 public function getImportSpec() {
225 $spec = array();
226
227 // get database columns (fieldlabels include fieldname as a key)
228 // using $$includerelations flag as false, so that it only contain $db fields
229 $spec['fields'] = (array)singleton($this->objectClass)->fieldLabels(false);
230
231 $has_ones = singleton($this->objectClass)->has_one();
232 $has_manys = singleton($this->objectClass)->has_many();
233 $many_manys = singleton($this->objectClass)->many_many();
234
235 $spec['relations'] = (array)$has_ones + (array)$has_manys + (array)$many_manys;
236
237 return $spec;
238 }
239
240 /**
241 * Determines if a specific field is null.
242 * Can be useful for unusual "empty" flags in the file,
243 * e.g. a "(not set)" value.
244 * The usual {@link DBField::isNull()} checks apply when writing the {@link DataObject},
245 * so this is mainly a customization method.
246 *
247 * @param mixed $val
248 * @param string $field Name of the field as specified in the array-values for {@link self::$columnMap}.
249 * @return boolean
250 */
251 protected function isNullValue($val, $fieldName = null) {
252 return (empty($val) && $val !== '0');
253 }
254
255 }
256
257 /**
258 * Encapsulates the result of a {@link BulkLoader} import
259 * (usually through the {@link BulkLoader->processAll()} method).
260 *
261 * @todo Refactor to support lazy-loaded DataObjectSets once they are implemented.
262 *
263 * @package cms
264 * @subpackage bulkloading
265 * @author Ingo Schommer, Silverstripe Ltd. (<firstname>@silverstripe.com)
266 */
267 class BulkLoader_Result extends Object {
268
269 /**
270 * @var array Stores a map of ID and ClassNames
271 * which can be reconstructed to DataObjects.
272 * As imports can get large we just store enough
273 * information to reconstruct the objects on demand.
274 * Optionally includes a status message specific to
275 * the import of this object. This information is stored
276 * in a custom object property "_BulkLoaderMessage".
277 *
278 * Example:
279 * <code>
280 * array(array('ID'=>1, 'ClassName'=>'Member', 'Message'=>'Updated existing record based on ParentID relation'))
281 * </code>
282 */
283 protected $created = array();
284
285 /**
286 * @var array (see {@link $created})
287 */
288 protected $updated = array();
289
290 /**
291 * @var array (see {@link $created})
292 */
293 protected $deleted = array();
294
295 /**
296 * Stores the last change.
297 * It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
298 * one of 3 strings: "created", "updated", or "deleted"
299 */
300 protected $lastChange = array();
301
302 /**
303 * Returns the count of all objects which were
304 * created or updated.
305 *
306 * @return int
307 */
308 public function Count() {
309 return count($this->created) + count($this->updated);
310 }
311
312 /**
313 * @return int
314 */
315 public function CreatedCount() {
316 return count($this->created);
317 }
318
319 /**
320 * @return int
321 */
322 public function UpdatedCount() {
323 return count($this->updated);
324 }
325
326 /**
327 * @return int
328 */
329 public function DeletedCount() {
330 return count($this->deleted);
331 }
332
333 /**
334 * Returns all created objects. Each object might
335 * contain specific importer feedback in the "_BulkLoaderMessage" property.
336 *
337 * @return DataObjectSet
338 */
339 public function Created() {
340 return $this->mapToDataObjectSet($this->created);
341 }
342
343 /**
344 * @return DataObjectSet
345 */
346 public function Updated() {
347 return $this->mapToDataObjectSet($this->updated);
348 }
349
350 /**
351 * @return DataObjectSet
352 */
353 public function Deleted() {
354 return $this->mapToDataObjectSet($this->deleted);
355 }
356
357 /**
358 * Returns the last change.
359 * It is in the same format as {@link $created} but with an additional key, "ChangeType", which will be set to
360 * one of 3 strings: "created", "updated", or "deleted"
361 */
362 public function LastChange() {
363 return $this->lastChange;
364 }
365
366 /**
367 * @param $obj DataObject
368 * @param $message string
369 */
370 public function addCreated($obj, $message = null) {
371 $this->created[] = $this->lastChange = array(
372 'ID' => $obj->ID,
373 'ClassName' => $obj->class,
374 'Message' => $message
375 );
376 $this->lastChange['ChangeType'] = 'created';
377 }
378
379 /**
380 * @param $obj DataObject
381 * @param $message string
382 */
383 public function addUpdated($obj, $message = null) {
384 $this->updated[] = $this->lastChange = array(
385 'ID' => $obj->ID,
386 'ClassName' => $obj->class,
387 'Message' => $message
388 );
389 $this->lastChange['ChangeType'] = 'updated';
390 }
391
392 /**
393 * @param $obj DataObject
394 * @param $message string
395 */
396 public function addDeleted($obj, $message = null) {
397 $this->deleted[] = $this->lastChange = array(
398 'ID' => $obj->ID,
399 'ClassName' => $obj->class,
400 'Message' => $message
401 );
402 $this->lastChange['ChangeType'] = 'deleted';
403 }
404
405 /**
406 * @param $arr Array containing ID and ClassName maps
407 * @return DataObjectSet
408 */
409 protected function mapToDataObjectSet($arr) {
410 $set = new DataObjectSet();
411 foreach($arr as $arrItem) {
412 $obj = DataObject::get_by_id($arrItem['ClassName'], $arrItem['ID']);
413 $obj->_BulkLoaderMessage = $arrItem['Message'];
414 if($obj) $set->push($obj);
415 }
416
417 return $set;
418 }
419
420 }
421 ?>