Skip to content

Results

Result

Bases: BaseModel

Creates a data model for a result and generates a unique result hash.

Source code in lume_services/results/generic.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
class Result(BaseModel):
    """Creates a data model for a result and generates a unique result hash."""

    project_name: str = Field(
        "local", alias="collection"
    )  # this will be the project_name for the scheduled flow

    # database id
    id: Optional[str] = Field(alias="_id", exclude=True)

    # db fields
    flow_id: str
    inputs: Dict[str, Union[float, str, np.ndarray, list, pd.DataFrame]]
    outputs: Dict[str, Union[float, str, np.ndarray, list, pd.DataFrame]]
    date_modified: datetime = datetime.utcnow()

    # set of establishes uniqueness
    unique_on: List[str] = Field(
        ["inputs", "outputs", "flow_id"], alias="index", exclude=True
    )

    # establishes uniqueness
    unique_hash: str

    # store result type
    result_type_string: str

    class Config:
        arbitrary_types_allowed = True
        json_encoders = JSON_ENCODERS
        allow_population_by_field_name = True
        extra = Extra.forbid

    _round_datetime_to_milliseconds = validator(
        "date_modified", allow_reuse=True, always=True, pre=True
    )(round_datetime_to_milliseconds)

    @validator("inputs", pre=True)
    def validate_inputs(cls, v):
        return load_db_dict(v)

    @validator("outputs", pre=True)
    def validate_outputs(cls, v):
        return load_db_dict(v)

    @root_validator(pre=True)
    def validate_all(cls, values):
        unique_fields = cls.__fields__["unique_on"].default

        # If flow_id is not passed, check prefect context
        if not values.get("flow_id"):
            if not hasattr(prefect_context, "flow_id"):
                raise ValueError("No flow_id passed to result")

            values["flow_id"] = prefect_context.flow_id

        if not values.get("collection") and not values.get("project_name"):
            if not hasattr(prefect_context, "project_name"):
                logger.warning("No project_name passed to result")

            else:
                values["project_name"] = prefect_context.project_name

        # create index hash
        if not values.get("unique_hash"):

            for field in unique_fields:
                if not values.get(field):
                    raise ValueError("%s not provided.", field)

            values["unique_hash"] = fingerprint_dict(
                {index: values[index] for index in unique_fields}
            )

        if values.get("_id"):
            _id = values["_id"]
            if isinstance(_id, (ObjectId,)):
                values["_id"] = str(values["_id"])

        values["result_type_string"] = f"{cls.__module__}.{cls.__name__}"

        return values

    def get_unique_result_index(self) -> dict:
        return {field: getattr(self, field) for field in self.unique_on}

    @inject
    def insert(
        self, results_db_service: ResultsDB = Provide[Context.results_db_service]
    ):

        # must convert to jsonable dict
        rep = self.get_db_dict()
        return results_db_service.insert_one(rep)

    @classmethod
    @inject
    def load_from_query(
        cls,
        project_name: str,
        query: dict,
        results_db_service: ResultsDB = Provide[Context.results_db_service],
    ):
        query = get_bson_dict(query)
        res = results_db_service.find(collection=project_name, query=query)

        if len(res) == 0:
            raise ValueError("Provided query returned no results. %s", query)

        elif len(res) > 1:
            raise ValueError("Provided query returned multiple results. %s", query)

        values = load_db_dict(res[0])
        return cls(project_name=project_name, **values)

    def unique_rep(self) -> dict:
        """Get minimal representation needed to load result object from database."""

        return {
            "project_name": self.project_name,
            "result_type_string": self.result_type_string,
            "query": {"unique_hash": self.unique_hash},
        }

    def get_db_dict(self) -> dict:
        rep = self.dict(by_alias=True)
        return get_bson_dict(rep)

unique_rep()

Get minimal representation needed to load result object from database.

Source code in lume_services/results/generic.py
151
152
153
154
155
156
157
158
def unique_rep(self) -> dict:
    """Get minimal representation needed to load result object from database."""

    return {
        "project_name": self.project_name,
        "result_type_string": self.result_type_string,
        "query": {"unique_hash": self.unique_hash},
    }

get_bson_dict(dictionary)

Recursively converts numpy arrays inside a dictionary to bson encoded items and pandas dataframes to json reps.

Parameters:

Name Type Description Default
dictionary dict

Dictionary to load.

required

Returns dict

Source code in lume_services/results/generic.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def get_bson_dict(dictionary: dict) -> dict:
    """Recursively converts numpy arrays inside a dictionary to bson encoded items and
    pandas dataframes to json reps.

    Args:
        dictionary (dict): Dictionary to load.

    Returns
        dict
    """

    def convert_values(dictionary):
        """Convert values to list so the dictionary can be inserted and loaded."""

        # convert numpy arrays to binary format
        dictionary = {
            key: Binary(pickle.dumps(value, protocol=2))
            if isinstance(value, (np.ndarray,))
            else value
            for key, value in dictionary.items()
        }

        # convert pandas array to json
        dictionary = {
            key: value.to_json() if isinstance(value, (pd.DataFrame,)) else value
            for key, value in dictionary.items()
        }

        # create file rep
        dictionary = {
            key: value.jsonable_dict() if isinstance(value, (File,)) else value
            for key, value in dictionary.items()
        }

        dictionary = {
            key: convert_values(value) if isinstance(value, (dict,)) else value
            for key, value in dictionary.items()
        }
        return dictionary

    return convert_values(dictionary)

load_db_dict(dictionary)

Loads representation of mongodb dictionary with appropriate python classes. Numpy arrays are loaded from binary objects and pandas dataframes from json blobs.

Parameters:

Name Type Description Default
dictionary dict

Dictionary to load.

required
Source code in lume_services/results/generic.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def load_db_dict(dictionary: dict):
    """Loads representation of mongodb dictionary with appropriate python classes.
    Numpy arrays are loaded from binary objects and pandas dataframes from json blobs.

    Args:
        dictionary (dict): Dictionary to load.

    """

    def check_and_convert_json_str(string: str):
        try:

            loaded_ = json.loads(string)
            return pd.DataFrame(loaded_)

        except json.JSONDecodeError:
            return string

    def convert_values(dictionary):
        if "file_type_string" in dictionary:
            file_type = get_file_from_serializer_string(dictionary["file_type_string"])
            return file_type(**dictionary)

        # convert numpy arrays from binary format
        dictionary = {
            key: pickle.loads(value) if isinstance(value, (bytes,)) else value
            for key, value in dictionary.items()
        }

        # convert pandas array to json
        dictionary = {
            key: check_and_convert_json_str(value)
            if isinstance(value, (str,))
            else value
            for key, value in dictionary.items()
        }

        dictionary = {
            key: convert_values(value) if isinstance(value, (dict,)) else value
            for key, value in dictionary.items()
        }

        return dictionary

    return convert_values(dictionary)

round_datetime_to_milliseconds(time)

Mongodb rounds datetime to milliseconds so round on assignment for consistency.

Source code in lume_services/results/generic.py
26
27
28
29
30
31
32
33
def round_datetime_to_milliseconds(time: Union[datetime, str]) -> datetime:
    """Mongodb rounds datetime to milliseconds so round on assignment for
    consistency.

    """
    if isinstance(time, datetime):
        time = time.isoformat(timespec="milliseconds")
    return time

ImpactResult

Bases: Result

Extends Result base and implements Impact specific attributes

Source code in lume_services/results/impact.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class ImpactResult(Result):
    """Extends Result base and implements Impact specific attributes"""

    plot_file: Optional[ImageFile]
    archive: HDF5File
    pv_collection_isotime: datetime
    config: dict

    _round_datetime_to_milliseconds = validator(
        "pv_collection_isotime", allow_reuse=True, always=True, pre=True
    )(round_datetime_to_milliseconds)

    @validator("plot_file", pre=True)
    def validate_plot_file(cls, v):

        # if the plot file isinstance of dictionary, load file type
        if isinstance(v, dict):
            return ImageFile(**v)

        return v

    @validator("archive", pre=True)
    def validate_archive_file(cls, v):

        # if the plot file isinstance of dictionary, load file type
        if isinstance(v, dict):
            return HDF5File(**v)

        return v