[{"data":1,"prerenderedAt":472},["ShallowReactive",2],{"footer-primary":3,"footer-secondary":93,"footer-description":119,"request-review-json-filtering":121,"request-review-json-filtering-next":197,"sales-reps":220},{"items":4},[5,29,49,69],{"id":6,"title":7,"url":8,"page":8,"children":9},"522e608a-77b0-4333-820d-d4f44be2ade1","Solutions",null,[10,15,20,25],{"id":11,"title":12,"url":8,"page":13},"fcafe85a-a798-4710-9e7a-776fe413aae5","Headless CMS",{"permalink":14},"/solutions/headless-cms",{"id":16,"title":17,"url":8,"page":18},"79972923-93cf-4777-9e32-5c9b0315fc10","Backend-as-a-Service",{"permalink":19},"/solutions/backend-as-a-service",{"id":21,"title":22,"url":8,"page":23},"0fa8d0c1-7b64-4f6f-939d-d7fdb99fc407","Product Information",{"permalink":24},"/solutions/product-information-management",{"id":26,"title":27,"url":28,"page":8},"63946d54-6052-4780-8ff4-91f5a9931dcc","100+ Things to Build","https://directus.io/blog/100-tools-apps-and-platforms-you-can-build-with-directus",{"id":30,"title":31,"url":8,"page":8,"children":32},"8ab4f9b1-f3e2-44d6-919b-011d91fe072f","Resources",[33,37,41,45],{"id":34,"title":35,"url":36,"page":8},"f951fb84-8777-4b84-9e91-996fe9d25483","Documentation","https://docs.directus.io",{"id":38,"title":39,"url":40,"page":8},"366febc7-a538-4c08-a326-e6204957f1e3","Guides","https://docs.directus.io/guides/",{"id":42,"title":43,"url":44,"page":8},"aeb9128e-1c5f-417f-863c-2449416433cd","Community","https://directus.chat",{"id":46,"title":47,"url":48,"page":8},"da1c2ed8-0a77-49b0-a903-49c56cb07de5","Release Notes","https://github.com/directus/directus/releases",{"id":50,"title":51,"url":8,"page":8,"children":52},"d61fae8c-7502-494a-822f-19ecff3d0256","Support",[53,57,61,65],{"id":54,"title":55,"url":56,"page":8},"8c43c781-7ebd-475f-a931-747e293c0a88","Issue Tracker","https://github.com/directus/directus/issues",{"id":58,"title":59,"url":60,"page":8},"d77bb78e-cf7b-4e01-932a-514414ba49d3","Feature Requests","https://github.com/directus/directus/discussions?discussions_q=is:open+sort:top",{"id":62,"title":63,"url":64,"page":8},"4346be2b-2c53-476e-b53b-becacec626a6","Community Chat","https://discord.com/channels/725371605378924594/741317677397704757",{"id":66,"title":67,"url":68,"page":8},"26c115d2-49f7-4edc-935e-d37d427fb89d","Cloud Dashboard","https://directus.cloud",{"id":70,"title":71,"url":8,"page":8,"children":72},"49141403-4f20-44ac-8453-25ace1265812","Organization",[73,78,84,88],{"id":74,"title":75,"url":76,"page":77},"1f36ea92-8a5e-47c8-914c-9822a8b9538a","About","/about",{"permalink":76},{"id":79,"title":80,"url":81,"page":82},"b84bf525-5471-4b14-a93c-225f6c386005","Careers","#",{"permalink":83},"/careers",{"id":85,"title":86,"url":87,"page":8},"86aabc3a-433d-434b-9efa-ad1d34be0a34","Brand Assets","https://drive.google.com/drive/folders/1lBOTba4RaA5ikqOn8Ewo4RYzD0XcymG9?usp=sharing",{"id":89,"title":90,"url":8,"page":91},"8d2fa1e3-198e-4405-81e1-2ceb858bc237","Contact",{"permalink":92},"/contact",{"items":94},[95,101,107,113],{"id":96,"title":97,"url":8,"page":98,"children":100},"8a1b7bfa-429d-4ffc-a650-2a5fdcf356da","Cloud Policies",{"permalink":99},"/cloud-policies",[],{"id":102,"title":103,"url":81,"page":104,"children":106},"bea848ef-828f-4306-8017-6b00ec5d4a0c","License",{"permalink":105},"/bsl",[],{"id":108,"title":109,"url":81,"page":110,"children":112},"4e914f47-4bee-42b7-b445-3119ee4196ef","Terms",{"permalink":111},"/terms",[],{"id":114,"title":115,"url":81,"page":116,"children":118},"ea69eda6-d317-4981-8421-fcabb1826bfd","Privacy",{"permalink":117},"/privacy",[],{"description":120},"\u003Cp>A composable backend to build your Headless CMS, BaaS, and more.&nbsp;\u003C/p>",{"id":122,"slug":123,"vimeo_id":124,"description":125,"tile":126,"length":127,"resources":128,"people":132,"episode_number":142,"published":143,"title":144,"video_transcript_html":145,"video_transcript_text":146,"content":8,"status":147,"episode_people":148,"recommendations":177,"season":178,"seo":8},"daed2c08-703a-43d6-ac97-aacac61be4c0","json-filtering","903011547","In this recording of our live event on January 11 2024, Rijk, Jonathan, and Daniel discuss filtering inside of stored JSON objects. ","b2035eee-e7c0-44b0-80f6-ba0f7d5fbe37",52,[129],{"name":130,"url":131},"GitHub Discussion","https://github.com/directus/directus/discussions/7277",[133,136,139],{"name":134,"url":135},"Rijk van Zanten","https://directus.io/team/rijk-van-zanten",{"name":137,"url":138},"Jonathan Wagner","https://directus.io/team/jonathan-wagner",{"name":140,"url":141},"Daniel Biegler","https://directus.io/team/daniel-biegler",1,"2024-01-18","Filtering of JSON Objects","\u003Cp>Speaker 0: Welcome, everyone. Happy 2024. We're excited for a new year. We're gonna try out a new well, we've been we've been playing around with this request for views format for a little while, but we're gonna we're gonna try out just covering a specific feature, talking through some details, making sure that we have a full specification on it, and getting additional community feedback, as needed. Today's topic will be JSON object filtering.\u003C/p>\u003Cp>It's been a it's a very, very popular request that we see quite frequently, and we would love to ensure that we solve this problem correctly and get it working for the databases that support it. I think one of the key issues that we've run into we've done a lot of development work on this, but what we run into is various database vendors support this differently. You fix it to support that thing. It breaks something else. We spent a lot of time kind of iterating back and forth on this feature, and I'll let Rai talk about some of that.\u003C/p>\u003Cp>But I mean, fundamentally, we we know that this is important. We consider it kind of a critical road map item, and that's the reason we're gonna talk in detail about it today.\u003C/p>\u003Cp>Speaker 1: Absolutely. Well, thank you for that. Yeah. And as you can see, I was the one who opened this feature request conveniently with no details whatsoever. I basically just say, hey, man.\u003C/p>\u003Cp>I wanna be able to filter in JSON objects. Good luck. And then everybody was like, yep. Me too. However, you know, the this this was done way before we had that RFC format, so the the the details are lacking.\u003C/p>\u003Cp>Let's let's call it that. So before we dive in too deeply, you know, Daniel, I'm just gonna throw you under the bus here. You wanna walk us through what what even is JSON filtering in the first place?\u003C/p>\u003Cp>Speaker 2: JSON filtering can be quite useful if you store JSON inside of your database, and you need to check some field inside of that JSON in your database. Like, we said in the beginning, right, not every database supports this as of right now, which forces us or will force us to, you know, do some little Directus magic as usual. But more and more databases kinda jump on the train, which is pretty neat. I think as what was it? SQLite very recently also announced that they have JSON b support, I think.\u003C/p>\u003Cp>I hope I'm not miss remembering. But, yeah, can be quite useful. If you have stored JSON, then you can filter on fields inside of that JSON. That's very useful.\u003C/p>\u003Cp>Speaker 0: And I think as as Kevin points out, actually, a couple of our default interface configuration our field configurations actually store in JSON by default. You can change them in most cases, but and, currently, that's the recommendation. So off the support side of things currently, for those of you listening, if you use the CSV format instead, on the database field setting side, you can then filter through the application in the API on those fields. But JSON is a little bit nicer structured format, so the reason for the request.\u003C/p>\u003Cp>Speaker 1: Yeah. And then the other the other big sort of elephant in the room there, of course, is that a lot of systems are utilizing and or could be utilizing more of a document style structure rather than, you know, a tabular style data structure. Especially, you know, when you have flexible schemas or unknown, you know, data structures ahead of time or semi semi structured data. Think about, you know, blocks on a page or something. You know, you're talking about sort of rich data that may or may not be structured like a table.\u003C/p>\u003Cp>So, therefore, you know, storing JSON in a Postgres database or another SQL database can give you some of that document magic without having to switch over completely to a document database. So there's a lot of a lot of benefits to having this. Now that being said, it also comes with a ton of complexities. So in my original feature request I was really thinking about it sort of as a filter against the data. Right?\u003C/p>\u003Cp>So similar to how we have, some functions to run against date values. I don't know if you've seen those before, but we have things like extract the year from a timestamp as a function. I was thinking about it the same way in my original feature request. Right? So I was thinking maybe we can have just like you would be able to do, like, year time stamps equals 2024, you could do something like, I don't know, Jason, name of the field, and then some sort of identifier string to select something from that field and then run filters against it, against that value that you've now selected.\u003C/p>\u003Cp>Right? Similarly you'd be able to use that in fields or in sort or other pieces like that And we actually put a lot of work in that already, and by we, I mean Tim who's in the chat, and I'm definitely gonna put him on stage and put him in the spotlight. Jonathan, if you wanna pull that up real quick, it might actually be fun to take a quick scroll through. Also, as a way to answer that, do all DBs even support it? Because there's a wonderful table at the top of that pool request.\u003C/p>\u003Cp>If you wanna pull up the, don't bring him up. We're recording. He's he's a little camera shy, but we can we can figure it out.\u003C/p>\u003Cp>Speaker 0: You know the PR off the top of your head, or do I just need to go find it?\u003C/p>\u003Cp>Speaker 1: Oh, just just open up pull requests, and if you search for Jason, it should be the only one that's there.\u003C/p>\u003Cp>Speaker 2: GitHub\u003C/p>\u003Cp>Speaker 0: pull requests.\u003C/p>\u003Cp>Speaker 1: We've left it open intentionally. We'll we'll circle back to that later how we're handling now. But if you just probably all the way at the bottom is is a little older. Yep. There it is.\u003C/p>\u003Cp>Speaker 2: Alright.\u003C/p>\u003Cp>Speaker 0: There's your matrix.\u003C/p>\u003Cp>Speaker 1: Right. So in this PR, we basically took a swing at implementing that, you know, JSON filtering the way it was described in that discussion. And by described, I mean, vaguely hinted at, because the description was a little poor. But we did implement it in that way, and it does actually work. So we do have, you know, queries for a little different databases.\u003C/p>\u003Cp>But as you can see in this table to the Chet's point earlier, not everything is supported everywhere, which becomes tricky immediately. Right? Because how do you then document it? Now it's gonna be database different. So there has been, you know, implemented some fallback support so you can see the difference in that green check versus the, that sort of Unicode check I guess.\u003C/p>\u003Cp>You you can see that for a lot of these things we're actually you know having to do some direct us magic to to make that work against the database which is not necessarily gonna be the most performant or the what's the right word? The the the quote, unquote right way to do it. Right? It's a blue check mark. Oh, if you're on Windows, just pretend.\u003C/p>\u003Cp>If you're on Linux or Linux, just Linux, just pretend. The check marks are green. Anyways, while building this, if you might wanna pull up the file section of this PR, Jonathan, I think it's a fun fun scroll through. This if we go all the way to the top, there's a tab. Files changed.\u003C/p>\u003Cp>Speaker 2: 2000 editions. Oh my god.\u003C/p>\u003Cp>Speaker 1: Yeah. There's there's only a 1,000 lines across 50 files. The long the long story short is that in the database helpers, if you see in that left hand sidebar, you know, we have to add all of the additional queries for JSON filtering ourselves for a little different database types. Sort of hard coded in, You see them here in the dialects if you just click one of those. It doesn't really matter which one.\u003C/p>\u003Cp>So all of those sort of you can see them here. You know, we have a JSON extract for MariaDB that may or may not exist across the other database vendors. Right? So it's not a SQL standard, which makes this a heck of a lot more complicated than you'd think. It's because every database does it differently.\u003C/p>\u003Cp>I believe it was Postgres that doesn't rely on functions even. It has like a special syntax with like, builders and arrows and whatnot. Which is pretty interesting. I don't\u003C/p>\u003Cp>Speaker 2: know if the\u003C/p>\u003Cp>Speaker 1: voice first 12 one has that in there. But anyways, there's there's, like oh, maybe this that's the one with the question marks there. I don't know. There's there's all sorts of different syntaxes is what I'm trying to say. Oh, here they are drawing with the arrow the the dash arrow arrow.\u003C/p>\u003Cp>That kind of stuff. So what we realized with this sort of initial work that we did on the JSON filtering is that, well, 8, super complicated as you can tell. There's a lot of additional work and a lot of additional logic, which means in turn you're gonna maintain. As always, because the more stuff you add, the harder it becomes to maintain. Right?\u003C/p>\u003Cp>But, the third thing is we're also starting to we also start to wonder like, okay. Instead of, having it as a function style thing in the query parameter, what if we do it more like the fields parameter itself? Right? Where you can just say, like in GraphQL, for example, you could just provide a nested tree that you wanna select instead of having to do it through a filter attribute. And at the same time, we've been working on a new data abstraction engine in the first place that already sort of, like it doesn't necessarily fix the fact that we have to do a lot of stuff ourselves for every database because it's just database specific, but it does it in a way that is designed to have database specific drivers rather than dialect specific overwrites.\u003C/p>\u003Cp>Right? So this is gonna be a bit of a more of a deep dive, but the way the API is set up right now is that everything effectively goes through connects, right? The SQL, what do you call it? SQL query builder that basically everybody and their mom is using. And what that means is that first it just becomes a sort of generic SQL, and then at the very end it's translated for the individual vendors.\u003C/p>\u003Cp>Right? So it's effectively this is a very crude explanation, but it's effectively just doing a find and replace for the quotes, making sure it's, like, the right quote for the right database. And then for some of the databases there's a little bit of additional magic, like for SQLite, you know, for an ALTER TABLE statement there's some magic included there. But the long story short is to add stuff like JSON filtering, it becomes tricky because now it's database specific. Right?\u003C/p>\u003Cp>So we don't really have a way and it was the same with the timestamp, helpers that we did earlier. We don't really have a way to make that agnostic across all of the different database vendors. Right? The second part there is that we know that we wanna support more database vendors over time, not less. So trying to do it in this sort of like make it generic first, add 1 by 1 overwrites to the dialects.\u003C/p>\u003Cp>It doesn't really scale anymore. Right? But end of the versions. A very good point, Tim. Because I think if we go back into the table, we already saw it.\u003C/p>\u003Cp>There's, like, differences between MySQL 5 and MySQL 8 plus, although 5 is now end of life. So that's a whole different discussion. But, we're the same for, you know, Postgres 10 versus 13 and up. So the way we're sort of re architecting that piece is by saying, you know, there is a singular, there's still a singular data entry point, but rather than relying on SQL, it relies on an abstract syntax tree, you know, Veron Design. Just a proprietary data format that explains to the engine what the data looks like that we wanna fetch.\u003C/p>\u003Cp>Right? And then for each of the different vendors, we're gonna have a driver that interprets that command and then just executes it in whatever way is appropriate for that driver. Right? So for a lot of the SQL drivers, we can still share a lot of that SQL magic like we're doing now. But it also means that we can start opening up the doors to other, you know, other data sources and other, JSON specific data sources.\u003C/p>\u003Cp>Right? So one of the reasons there, to keep going on that train of thought, is also because once we started getting super deep into JSON filtering, we also started to realize, like, well, if we implement the JSON filtering like field selection rather than, filtering specific functions, you end up with effectively just drivers for a document data store. Right? Which is very interesting. So with that in mind, we could also start thinking about what does it look like to use something like a DynamoDB or MongoDB or some other sort of key value slash unstructured document style store with the sort of direct to the API.\u003C/p>\u003Cp>Importantly, not treating them as a relational database, but treating them as a document data store, like leaning into the flexibility of a document database rather than trying to force them into a relational structure, right, like we've seen seen in the past and have explicitly avoided. That was a very long train of thought. But what I'm trying to say is, what we're getting at with that is that we've effectively not shelved, but we've sort of put the pause on this particular PR. And instead, focusing now on implementing this sort of JSON selection support directly into this driver based approach. Right?\u003C/p>\u003Cp>Because the one thing we didn't wanna do is add a lot of complexity add more complexity now only to then replace it, you know, in a couple of months with another breaking chain with completely different structure. But we've left open the PR because we definitely don't wanna lose any of work in any of the code because it is being, you know, repurposed repurposed into the new structure. In the chat, there would all be called features, not bugs. That's very true. Any bug in SQL is a feature.\u003C/p>\u003Cp>Speaker 0: Got a question about the specs for the drivers. I believe we do plan to fully spec and document those the driver interfaces, right, for the new architecture?\u003C/p>\u003Cp>Speaker 1: Absolutely. Absolutely. Yes. Yep. I mean, as of right now, it's still very much, you know, in in r and d, so we're not opening the doors quite yet on on building your own, but it is built basically as an extension in mind.\u003C/p>\u003Cp>Right? So we wanna make sure that those things are just do whatever you want as long as you adhere to the spec, you can you can, you know, save and read data from wherever the f you want. But, you know, we're definitely focusing on just feature parity with what we have first, and then expanding the scope there with JSON filtering and some other, you know, additional relationship types. And then, you know,\u003C/p>\u003Cp>Speaker 0: Yeah. Yeah. I love the I love the idea that we're moving towards a an extension driven driver approach. Right? The same way we've done with so many other components of the platform, making it so that it's extensible.\u003C/p>\u003Cp>And it also means that you've got a custom data source. You've got an API. You've got other things. You'd have a spec and be able to build your own driver against some custom data source that you have, and be able to leverage the API and power of the Directus application on top of that. So very exciting very exciting 2024.\u003C/p>\u003Cp>Speaker 2: Absolutely. Wait for the first person, implementing the, Excel sheet data store. I think this will be a a smash hit in Germany. All of the all of the companies love Excel so much. Oh, Maybe maybe The whole finance world rejoices.\u003C/p>\u003Cp>Oh.\u003C/p>\u003Cp>Speaker 1: We should start that as a little competition. Whoever builds the Excel data store first gets a shout out on the website.\u003C/p>\u003Cp>Speaker 2: Oh, you bet. Oh, you bet.\u003C/p>\u003Cp>Speaker 1: A signed certificate of insanity by by me. Love that.\u003C/p>\u003Cp>Speaker 2: The the worst thing is the the very worst thing is that people will actually, like, honestly use it, I'm afraid.\u003C/p>\u003Cp>Speaker 1: It's it's not the using it part that that worries me. It's the relying on part that me. Anyhoo, circling back to, more of the the specifics of JSON filtering as a whole. Right? So just to circle circle back to to a requirements list because we we don't have, you know an RFC for JSON filtering proper we don't really have an RFC for what it could look like in new formats.\u003C/p>\u003Cp>Right? We do know that we want to support it from field selection perspective with that sort of nonstructured data store in mind. We do know that we have to support it as part of filters and sorting and querying and and all of that goods. That being said there is a very interesting difference between JSON objects which is sort of the assumed default that we've been talking about here, and arrays, which is where it gets real complicated real quick. Because one of the main questions or use cases where this is coming from is for fields like tags, right, where you just have a JSON list of individual strings for tags and then how do you search through those.\u003C/p>\u003Cp>Right? But now you're not so much talking about you need to make a nested selection of an adjacent path and then filter against that. Now you have to now search through each item of the the array. Right? So we do have some specific magic going on for one to many's right now like some and none for example to say I want all of the values in my related table to match x y z But we should probably add something along those lines for Jason when it comes to filtering specifically.\u003C/p>\u003Cp>Speaker 2: I think.\u003C/p>\u003Cp>Speaker 1: This is mostly again, for for those who've who've joined us in the past on these live sessions, for Rich, welcome back, the the goal is oftentimes, you know, to really, really diverge to find what are the boundaries of what we can or eventually maybe want to do with this, you know, and then converge back into what is realistic and what does that first MVP look like. This is really that diversion thinking stage. Right? It's like, how how far do we need to go when it comes to filtering on stored values? For example, do you need to be able to say things like, I want all of the JSON objects into an array to have a nest of property author dot agent.\u003C/p>\u003Cp>They all need to be bigger than than 12. Right?\u003C/p>\u003Cp>Speaker 2: I've actually never used, like, field type currying like in Postgres or in the others. So I'm curious. Is there a function of the database that tells you whether or not the stored value is an array or not? Like, is there, like, some database layer check that tells you that? Is that possible?\u003C/p>\u003Cp>Speaker 1: I I think the the realistic answer is we can't assume that there is because we're talking about, you know,\u003C/p>\u003Cp>Speaker 2: good plan. Good plan.\u003C/p>\u003Cp>Speaker 1: We're we're talking about various different database vendors, and we're talking about various different SQL like vendors that, you know, are are sort of SQL inspired, but not fully compliant. Think of, you know, the the I wanna say plan and scale, but they recently added foreign keys. So I think they might not be more compliant than they used to be. But those types of vendors, right, where it's like they they use the SQL syntax for basic querying, but they don't have a full SQL engine behind it because they just implemented the data store differently. So the answer is we don't know nor can know.\u003C/p>\u003Cp>Because we need to build this and design the the specs for this in a way that is sort of agnostic to a data store. Right? Tim was mentioning, you know, we have 3 different pieces of functionality extracting the data in fields, extracting the data from a field, and then using it to filter. And then with the deep filtering against stuff within that JSON blob, filtering inside the j oh, yeah. Yeah.\u003C/p>\u003Cp>Filtering inside of an area with a deep. Yeah. Yeah. Yeah.\u003C/p>\u003Cp>Speaker 2: So\u003C/p>\u003Cp>Speaker 1: So I think, ideally and this is a bit of a a different way of thinking in how we treat the API because it used to be very, you know, tabular data forward first is I think what we're leaning towards now is really treating JSON values f it as if it's just any other table effectively. Right? So we just treat an object as if it's any other item in the database. We treat an array as if it's any other table in database. And therefore, just allow you to use any of the regular query parameters as if it's a table.\u003C/p>\u003Cp>But you get some very interesting usage patterns at that point. Right? Could you consider using JSON schema to type and validate field? Absolutely. There's a very interesting thing, though, when it comes to to JSON and and typing slash validating, is that a lot of people sort of accidentally will start using a document database as a relational database, therefore, completely defeating the point of using a document database.\u003C/p>\u003Cp>Right? And this has been the the main, if not the only argument, that we've had against supporting MongoDB in the past because it has come up before. You know, there have been a couple of feature requests every now and again. But it was always the question of, oh, let's just use MongoDB instead of Postgres. But that really makes no sense if you did pardon my French.\u003C/p>\u003Cp>It makes no sense if you just if you just think about it real. Right? Because the a document data store has a lot of perks. There's, like, a lot of good things about it. But using it as a relational database is just not what it was designed to do best.\u003C/p>\u003Cp>So you're kinda just forcing it to do something it wasn't made for. At which point, it's like just use a SQL database. Right? If you wanna have relational data in a tabular format, use the SQL database. That's what they excel at.\u003C/p>\u003Cp>That's what they're best at. That being said, it's the overlap of the 2. You know? And so overlap of the 2 where it gets really interesting where you know, if you're talking about, things with rich content, like, you have views in an app or pages on the website or something, right, where you have fixed data points, fixed metadata points that you have for everything. You know, you have a title, you have an author, you have a published date, you have some of those pieces, like a status.\u003C/p>\u003Cp>But then you also have rich content which is gonna be, you know, a semi, semi flexible schema. Right? It's when you mix the 2 where I think it gets really, really interesting. But that does also mean that you need to be able to manage and search through and extract the nonstructured part with a semi known schema. That's where it gets tricky.\u003C/p>\u003Cp>Right? So, what made me think about that is the question, you know, consider using a JSON schema to validate the fields. If you're strictly validating against one schema of JSON objects, at that point, are you better off with using JSON or should you just make a couple of columns? Right? Because a many to one, field with a related table with structured columns will most likely perform better than trying to do it with nested objects.\u003C/p>\u003Cp>If, you know, assuming that there's gonna be searching and filtering and and organizing involved. Another question from the chat here was, Postgres allows for date ranges, which is effectively a JSON array. Somebody built a custom date range interface for this but cannot filter against it. That's the question is that something that will be supported by this pr or would that be something else? Yep.\u003C/p>\u003Cp>That's basically exactly it. You know, you have any sort of arbitrary JSON value you need to be able to manage and search through it. It is a table document for the record, though. Each record in a collection have significantly different content structure. Yes.\u003C/p>\u003Cp>That is a very good point, which is why the JSON schema again becomes interesting. Right? Because if you have a column that you want to be the same schema for every single row in your database, in your table, then relying on, you know, adjacent schema for validation makes a lot of sense because you want every one of those to be the same. At that point, you know, what becomes the benefit of using JSON over just columns? Right?\u003C/p>\u003Cp>That those those are sort of the questions that we need to, ask ourselves. Because the other one is like, if you have the because because the nice thing about the JSON thing is that anything could go in there. That's kind of the point. Right? It's like an unstructured data type.\u003C/p>\u003Cp>So what if you want everything to be different? Or maybe you want it to be one of 5 known different schemas, right, instead of just the same fixed object. I guess with an adjacent schema, you have union types, if I'm not mistaken. But food for thought. It's an interesting one.\u003C/p>\u003Cp>But it it does, you know, it does raise a different question that we need to write. What about validation? Right? Because, like, we have validation rules for, for regular columns now where you can say, you know, a number needs to be up and greater than something. For JSON fields specifically, we need to come up with something.\u003C/p>\u003Cp>Right? Using a JSON schema could be very interesting. Maybe a JSON schema for the whole record instead of just the JSON thing field could be interesting. Maybe both. Maybe it's a nested.\u003C/p>\u003Cp>Here's another another thing. Let's see what was the other chat related things on the same click. Somebody else, asked, I'd have missed this, but the idea is to integrate directives as existing filters at dynamic variables, etcetera, or would there be a new JSON specific functions? So that is a great question and kind of the the, instigator of rehashing this chat because we had this feature request. We sort of we're looking at it through the lens of let's make a specific JSON function like we have the others, and just implement that.\u003C/p>\u003Cp>Right? And then Tim went ahead and did it. Kudos. A tremendous amount of effort. But we also really started to, then, you know, it's it's kinda what we talked about with the the data abstraction piece just now.\u003C/p>\u003Cp>We started to realize that it might be way more powerful to try to treat it as an item rather than treat it as a value. So you unlock all of the other query parameters against the value. That answers. It gets it gets a little bit theoretical quick here.\u003C/p>\u003Cp>Speaker 0: Soon as you use\u003C/p>\u003Cp>Speaker 1: it relationally, it becomes even harder to maintain. Good point. Good point. Just started using as a MongoDB as a replacement for Redis for for a project that doesn't use Directis they are crypto so their relational is based on wallets. Makes sense You know, if you think about that use case, you have a semi structured data.\u003C/p>\u003Cp>You know roughly what goes into a wallet, but there's a lot of optional, fields oftentimes. Depends on the implementation, of course. But one thing you do know is that you have a fixed primary key that you can use for some relationships. Big sport of Jason's schema. Everywhere.\u003C/p>\u003Cp>Just for the spec Simpler, Tidebox for the win. I that used to be my go to and then a lot of others in in my team sort of using Zod, and I kind of have been converted to the\u003C/p>\u003Cp>Speaker 2: The inference is pretty\u003C/p>\u003Cp>Speaker 1: is not true here.\u003C/p>\u003Cp>Speaker 0: Let's see what\u003C/p>\u003Cp>Speaker 1: You kinda want to know that oh, it disappeared. Where it is? Oh, you kinda wanna know that a list of tags is always a list of strings or that an object matches a specific schema. Depends. I'd I'd say, generally, yes, but also really depends.\u003C/p>\u003Cp>For example, in to give a system thing as an exam right? For interface options, we don't know ahead of time what's going in there. Right? It's up to the interface to figure out. It's just anything.\u003C/p>\u003Cp>It's basically just a blank store for an interface to do some sort of options, but it really doesn't matter what the format is, what's in it, etcetera. Somebody shared p g adjacent schema, schema validation for postgres specifically. It's where it gets real tricky. Solution relied on the different filter syntax. JSON pass number to XPath.\u003C/p>\u003Cp>Yeah. Yep. Exactly. I think the, the the JSON schema example is is a good example of the the added sort of complexity that we're putting ourselves in here on purpose, which is that we're really really aggressively want this to be a standard in the API that you can use no matter what the data source is. And then it just depends on the data source how performant it becomes.\u003C/p>\u003Cp>Right? But I'd really, really would like to prevent ending up with, you know, a long table in the docs that says, can you use JSON filter? Yes. No. No.\u003C/p>\u003Cp>No. Yes. Yes. Yes. No.\u003C/p>\u003Cp>Yes. Yes. Yes. No. For for the different drivers, especially if those drivers are are third party maintained.\u003C/p>\u003Cp>You know, there's it's gonna be incredibly annoying. It does mean that for some of the data source, we'll definitely have to, we'll definitely have to, what's the right word, add add fallback logic and do a lot of that magic on the director side of things rather than on the data store but so be it You know? I I think that is a a trade off worth having, as long as these drivers can handle it themselves. Because Yeah. Right.\u003C/p>\u003Cp>Speaker 2: We're fine. Scenario. Go ahead. Sorry. The the delay.\u003C/p>\u003Cp>No. It was, just for Brian's message, which was, the discussion point, my mind towards use cases like page builders, where I wouldn't need to create a ton of separate tables. That's true. And this is exactly the use case where I, in the past have used. You know, just sometimes if you very quickly want to throw together a component, but you don't really don't wanna create, like, different relationships and whatever.\u003C/p>\u003Cp>So you just quickly make, like, an h ref with the label and slap it together as adjacent, for example, very quickly, you know. That's that's a very nice use case for that. Yeah. Very quickly, very easy.\u003C/p>\u003Cp>Speaker 1: Mhmm. For sure. And I\u003C/p>\u003Cp>Speaker 2: I agree.\u003C/p>\u003Cp>Speaker 1: Another interesting thing that he's mentioning there is saying that it would be better served by a NoSQL database. Right? That's where the interesting discussion really starts. Because if you're talking about, you know, a page builder for a website, then the pages themselves, just like what is the route? What is the title?\u003C/p>\u003Cp>What is some of the metadata? I'd argue that's probably better for a SQL database. Right? Because you're talking about structured data that you wanna query through on a column by column basis. You wanna say get all articles from, you know, January 2024, for example.\u003C/p>\u003Cp>That is gonna be faster in, you know, a relational database because it has, you know, structured data with known types and you could search through them efficiently. But the content part of a page, absolutely, it makes a lot of sense to have that as a page builder. Right? Because you have semi structured data. You have an array with 1 or more or 0 or more, I guess, you know, blocks of a known type.\u003C/p>\u003Cp>So that's where that JSON validation with schema comes in potentially. You know? And then that's the data is semi structured. So you have objects of, like, x different types, and then that has an array of those things. Yeah.\u003C/p>\u003Cp>So that makes a ton of sense. But I think the real power is the combination of the 2. Because if you were to try to do all of those pages in MongoDB, now you have to trade off of the sort of filtering performance and sort of the searchability aspect of it. But on the SQL side right now, you have the penalty of not having that unstructuredness of the data. Right?\u003C/p>\u003Cp>So right now, we would rely on any to any's for that type of use case, which has its own benefits around, you know, the searchability and the joinability. But, you know, if you're on pages where you don't really have to reuse, sections, for example, then, yeah, I agree. You're doing it as a JSON object for for a page builder makes a lot of sense. Right? But implementing it in a way where you get both is really the\u003C/p>\u003Cp>Speaker 2: Oh, this this is why Brian, the father of, agent c OS, He he felt that pain probably, so he came up with this. Yeah. Yeah. But I agree wholeheartedly. Like, this came up exactly when I wanted to do then.\u003C/p>\u003Cp>If you have, like, one off components that you don't really, you know, they don't have something to do with other stuff or you want to get some data in, it's very, very, very neat. The repeater interface is very useful for that.\u003C/p>\u003Cp>Speaker 1: Now there's one additional topic that we haven't touched on that I think we can easily spend 15 minutes on. And Brian is hinting at it right now, actually. Relationships. Relationships is where it gets real tricky. Right?\u003C/p>\u003Cp>Because right now, we know that's, article dot author is a foreign key to author's table. Right? So that that metadata is known. We know that ahead of time. So therefore, we can do things like give me everything, nest everything, nest everything, you know, start up start dot star whatever.\u003C/p>\u003Cp>And it knows what to nest because it knows what the relationships are. Right? When you're in within a JSON document or an area of JSON documents, there's really no guarantee that something is an ID or there's no real knowledge of what that path is and where it points. Right? So once you want to start nesting, data like that, it becomes tricky, tricky, tricky.\u003C/p>\u003Cp>Right? So within a a MongoDB context, if I'm not mistaken, it's like a MongoDB ID is always globally unique. Right? Right? So as soon as you have a nested data of that, I what what did they call it again?\u003C/p>\u003Cp>There was, like, a Mongo ID special type. I forgot what it's what it's called. Object ID is I think what they call the type. Anyways, when it encounters a value of type object ID, it it can assume that it's a globally unique ID and therefore knows what the what document to fetch. But in a sort of hybrid model, you don't necessarily have that luxury.\u003C/p>\u003Cp>Right? Because you can have a nested, categories array in your adjacent object that just says 123 with the assumption that you're talking about rows 123 in your categories table. Right? Because you're mixing concepts, because there's no such thing as globally unique IDs in a relational database.\u003C/p>\u003Cp>Speaker 2: Kind of a design. I'm already imagining some type of custom format with, like, a dollar sign, direct to dot one or something, and then we have to translate it. And oh, no. It's a whole repertoire. We could do this.\u003C/p>\u003Cp>Speaker 1: The same way as the chat, apparently. Somebody is saying direct us could prefix it.\u003C/p>\u003Cp>Speaker 2: That's what I meant. Oh, yeah.\u003C/p>\u003Cp>Speaker 1: We could. We could. But it does mean that we would have to make the data in your tables proprietary, which is something that I'm personally not a fan of. Right? It's like the system tables are one thing, because, of course, you need to have some metadata saved somewhere and we need, you know, a place to store that.\u003C/p>\u003Cp>But I really, really badly, desperately do not want to introduce any sort of direct to specific proprietary nonsense into your own database, in the user tables rather. So that's a tricky one.\u003C/p>\u003Cp>Speaker 2: Right? So If we would do that, then we would probably also have to provide a way to extract the data, and it's it's more and more patches on top of patches. I don't think we wanna go down that road.\u003C/p>\u003Cp>Speaker 1: The question really becomes like, how do you pull together a join? Right? And it's the same the same reason we haven't really, because Brian said relational repeater. Right? The reason why our repeater interface currently is not relational is because of this exact reason.\u003C/p>\u003Cp>It's like, how do we store the information of, you know, what field is relational to what? And the other tricky thing is, you know, without some real clever engineering from John and Nicklaus, most likely, There's no guaranteed way you can really do a join, based on the nested JSON value. Like some databases would, some databases don't. You know, it's the same sort of story. Right?\u003C/p>\u003Cp>So it becomes a performance nightmare. But what are some of our options there? Right? Just thinking out loud. One would be that we have some sort of, kinda like direct directors relations.\u003C/p>\u003Cp>Right? Where we're saying, well, if your JSON object has a path called, I don't know, article dot author, it we're assuming it's a foreign key, but that is always gonna be, you know, you can't really guarantee that. Right? An additional option would be to allow, you know, the end user to sort of pass the join information, right, manually through the API, where instead of saying author dot name where we know that author is a relationship, maybe there's some sort of syntax where you can just say author joins on table dot column, dot name. I don't know.\u003C/p>\u003Cp>Right? So you can pass it yourself. Can we just say we don't allow relations in JSON? I've I've tried that in the past and I think the answer is no. No.\u003C/p>\u003Cp>I I'm I'm afraid the answer is no for that. Because because that is that's kind of what, you know, that's the situation we're in now. Or, like, we have a relate we have a repeater, and people continuously ask, can we make it relational? And I continuously answer, how is that any different from a one to many? And then oftentimes, you know, we're just sort of in a sort of stalemate, which is, well, it's not JSON, but it does do what it needed to do.\u003C/p>\u003Cp>So we're good enough. Right? But I think it's fair to say that one of the design requirements of the system would be that we need to have some sort of way where you can relate data from a key that is inside of a JSON document.\u003C/p>\u003Cp>Speaker 2: Yeah. I assume we would have\u003C/p>\u003Cp>Speaker 0: to keep\u003C/p>\u003Cp>Speaker 2: track of it. We've\u003C/p>\u003Cp>Speaker 0: talked about this for the editor JS. Right? And or WYSIWYG being able to have great relational data structure that's stored inside there. There's actually a relationship to another\u003C/p>\u003Cp>Speaker 2: Right.\u003C/p>\u003Cp>Speaker 0: This object. Right? Another direct to side of them. So we we've we are thinking about this. We have some ideas around this.\u003C/p>\u003Cp>I think it's separate from this particular request. I think right now, this request is primarily around filtering and searching and finding\u003C/p>\u003Cp>Speaker 1: It's true. Yeah. Yeah. Because because for the record, this really quickly turned into how to just properly do JSON indirect this rather than how to filter through an object. All all very much, you can see to that.\u003C/p>\u003Cp>Speaker 0: Still important. It's it's part of the the divergence, the comeback to convergence. But, I just as as you guys have continued to talk about this, I think there there is a separate kind of feature function likely for relationals versus the ability to filter and just find stuff inside these objects and do it really and performantly.\u003C/p>\u003Cp>Speaker 1: Yeah. The the WYSIWYG one is an interesting example. That's that's a good point because this everything that we've been discussing so far is basically gonna be the underlying engine slash foundational stuff for that type of use case. Right? Where in a in an editor JS type of environment, the line of text kinda like in like a notion docket, for for those unaware of editor JS.\u003C/p>\u003Cp>Each line of text is its own, you know, block effectively, and you have blocks of different types. But then a block could be a relationship elsewhere. Right? Which is a great example of this. Like, how do you know what that relationship is?\u003C/p>\u003Cp>In I'm pretty sure in in, you know, a Notion like environment or an Energy. Js type environment where you have, you know, a semi structured data, part of the solution there is that that joint information could be in the block, right, where you say you have an image block. And because you have an image block, you now know to look at images for looking up the related value. Right? But, again, that depends on the schema that is in your JSON field.\u003C/p>\u003Cp>And if we were to make that a requirement where we're saying, well, there's a there's a a god. Way to go, Brian. If we if we're saying that there that has to be in the JSON field and now we're making the form of proprietary again. Right? Because now we're requiring your JSON object to adhere to a certain spec.\u003C/p>\u003Cp>We designed just to be able to do relationships. Tricky. For a second, I was I thought you were gonna write down, this will be tricky. Rest rest assured. This will be very difficult.\u003C/p>\u003Cp>Is there not a normal standard for that already? Yeah. You'd hope so, but the answer is no.\u003C/p>\u003Cp>Speaker 0: We gotta You know, there are some other CMS vendors that do this, but, again, they've got more, I think, generally well defined kind of hearts hardened structures. The ability to be agnostic is where this becomes very difficult.\u003C/p>\u003Cp>Speaker 1: Yeah. No. That's a great point because we\u003C/p>\u003Cp>Speaker 0: to say, this is the data model you must use. Therefore, this is I I can therefore rely on the fact that my structures are gonna match. But I think, potentially, with some of the discussion we had on the schema piece, you know, as part of this entire discussion here, if you were able to say, I am predefining schema. I know what that is, and then I wanna have enforcement validation and understanding that that's there. I think you we could ag you know, the agnosticity is you define the schema or we define a schema, but it's generic in the sense of you're not required to have it.\u003C/p>\u003Cp>You have a JSON field that's unstructured and doesn't have schema. But if you choose to implement that, then you are actually then getting the benefits of now I can I can kind of quote guarantee as much as you can guarantee in a JSON world? And I know what my data structure is, and I know how to how to interact with that event.\u003C/p>\u003Cp>Speaker 2: Yeah. To be frank with this, it's just very, you know, very, very difficult. I mean, we we could, you know, like, just, willy nilly, we could keep track of, some keys in a new table that says, hey. This relates to that, like, similar to, like, relations, like we do it right now. And, well, try to keep that instinct.\u003C/p>\u003Cp>It's well,\u003C/p>\u003Cp>Speaker 1: So when you're talking about the under, other vendor use case, right, I think for the majority there, it's basically around the idea that you have a globally unique ID. Right? So it's very document database first approach. If you've ever created a so we we use Notion a lot. Right?\u003C/p>\u003Cp>So if you ever create a Notion page that links to another Notion page, that is just a globally unique ID. So therefore, you can nest it however you want and it knows because everything is a page at all times and every page has a globally unique ID. So therefore, that sort of relationship lookup kind of solves itself. Right? Because you don't need to know what collection or what table to look at to find the item because it's all globally unique.\u003C/p>\u003Cp>Right? The difficulty here lies in the that hybrid model where you want to nest a relational record inside of a document, which itself might be, you know, related from a relational record relational row. Oh, do we like to make things difficult? But that's why it's cool. Let's see.\u003C/p>\u003Cp>There is a portable text spec. Absolutely. Which, again, I think it would be great to have a some sort of, you know, portable text interface or, an extension that handles that specific format. But I I it would be a shame if if this everything that we're talking about only works if you're in a specific format. Right?\u003C/p>\u003Cp>Because who are we to say what the right format is?\u003C/p>\u003Cp>Speaker 2: I agree. I just looked at the, audible text one. Yeah. It look looks very, very similar to how everybody else does it. You know?\u003C/p>\u003Cp>You have some things. You define your type, and then you have some keys depending on that type. Great.\u003C/p>\u003Cp>Speaker 1: Cool. Alright. We're quickly reaching the top of the hour here. It was it was a good idea to only focus on one topic for these sessions. Just based on prior experience, it always turns into this should be quick and then you you talk about it for an hour.\u003C/p>\u003Cp>Just to give a chance to chat, any other thoughts, questions, concerns, ideas, shout outs. I wanna say hi to the family at home. This is the time.\u003C/p>\u003Cp>Speaker 2: Or when they start typing.\u003C/p>\u003Cp>Speaker 1: Everyone is typing. Config is code. Uh-oh. I'm pretty sure we have a separate one of these dedicated to that again coming up. Ship it.\u003C/p>\u003Cp>Speaker 2: Ship it? I'm not gonna I'm not gonna say it right now.\u003C/p>\u003Cp>Speaker 1: Let's go. So for 20 minutes for brings everyone here. Same. Yeah. The next one's gonna be Comic It's Code.\u003C/p>\u003Cp>Speaker 2: Cool. Cool. Cool. Cool.\u003C/p>\u003Cp>Speaker 1: Cool. So make sure to come back for that one, saucy, saucy. Cool. Well, if there's no other questions, thoughts, feelings, or concerns, let's wrap this one up. It's Brian just said the conversation about config as code is is in 2 weeks, not the release.\u003C/p>\u003Cp>I think this is a very, very, very important\u003C/p>\u003Cp>Speaker 0: Thank you, Brian.\u003C/p>\u003Cp>Speaker 1: Thank you, Brian.\u003C/p>\u003Cp>Speaker 2: It's Android. So it's it's imported immediately. Go now. What have we done?\u003C/p>\u003Cp>Speaker 1: Cool. Well, all that being said, everybody. Thank you so much for joining us here live. Thank you so much for watching this at home. If you're seeing it on Directus TV, for the people live here, if you don't know what that is, check it out.\u003C/p>\u003Cp>Directus dot a 0/ tv. I think I got that right. Right? It'll be up in about a week's time. But for now, I wanna say thank you all.\u003C/p>\u003Cp>Good luck in Godspeed. And we'll see you in 2 weeks.\u003C/p>","Welcome, everyone. Happy 2024. We're excited for a new year. We're gonna try out a new well, we've been we've been playing around with this request for views format for a little while, but we're gonna we're gonna try out just covering a specific feature, talking through some details, making sure that we have a full specification on it, and getting additional community feedback, as needed. Today's topic will be JSON object filtering. It's been a it's a very, very popular request that we see quite frequently, and we would love to ensure that we solve this problem correctly and get it working for the databases that support it. I think one of the key issues that we've run into we've done a lot of development work on this, but what we run into is various database vendors support this differently. You fix it to support that thing. It breaks something else. We spent a lot of time kind of iterating back and forth on this feature, and I'll let Rai talk about some of that. But I mean, fundamentally, we we know that this is important. We consider it kind of a critical road map item, and that's the reason we're gonna talk in detail about it today. Absolutely. Well, thank you for that. Yeah. And as you can see, I was the one who opened this feature request conveniently with no details whatsoever. I basically just say, hey, man. I wanna be able to filter in JSON objects. Good luck. And then everybody was like, yep. Me too. However, you know, the this this was done way before we had that RFC format, so the the the details are lacking. Let's let's call it that. So before we dive in too deeply, you know, Daniel, I'm just gonna throw you under the bus here. You wanna walk us through what what even is JSON filtering in the first place? JSON filtering can be quite useful if you store JSON inside of your database, and you need to check some field inside of that JSON in your database. Like, we said in the beginning, right, not every database supports this as of right now, which forces us or will force us to, you know, do some little Directus magic as usual. But more and more databases kinda jump on the train, which is pretty neat. I think as what was it? SQLite very recently also announced that they have JSON b support, I think. I hope I'm not miss remembering. But, yeah, can be quite useful. If you have stored JSON, then you can filter on fields inside of that JSON. That's very useful. And I think as as Kevin points out, actually, a couple of our default interface configuration our field configurations actually store in JSON by default. You can change them in most cases, but and, currently, that's the recommendation. So off the support side of things currently, for those of you listening, if you use the CSV format instead, on the database field setting side, you can then filter through the application in the API on those fields. But JSON is a little bit nicer structured format, so the reason for the request. Yeah. And then the other the other big sort of elephant in the room there, of course, is that a lot of systems are utilizing and or could be utilizing more of a document style structure rather than, you know, a tabular style data structure. Especially, you know, when you have flexible schemas or unknown, you know, data structures ahead of time or semi semi structured data. Think about, you know, blocks on a page or something. You know, you're talking about sort of rich data that may or may not be structured like a table. So, therefore, you know, storing JSON in a Postgres database or another SQL database can give you some of that document magic without having to switch over completely to a document database. So there's a lot of a lot of benefits to having this. Now that being said, it also comes with a ton of complexities. So in my original feature request I was really thinking about it sort of as a filter against the data. Right? So similar to how we have, some functions to run against date values. I don't know if you've seen those before, but we have things like extract the year from a timestamp as a function. I was thinking about it the same way in my original feature request. Right? So I was thinking maybe we can have just like you would be able to do, like, year time stamps equals 2024, you could do something like, I don't know, Jason, name of the field, and then some sort of identifier string to select something from that field and then run filters against it, against that value that you've now selected. Right? Similarly you'd be able to use that in fields or in sort or other pieces like that And we actually put a lot of work in that already, and by we, I mean Tim who's in the chat, and I'm definitely gonna put him on stage and put him in the spotlight. Jonathan, if you wanna pull that up real quick, it might actually be fun to take a quick scroll through. Also, as a way to answer that, do all DBs even support it? Because there's a wonderful table at the top of that pool request. If you wanna pull up the, don't bring him up. We're recording. He's he's a little camera shy, but we can we can figure it out. You know the PR off the top of your head, or do I just need to go find it? Oh, just just open up pull requests, and if you search for Jason, it should be the only one that's there. GitHub pull requests. We've left it open intentionally. We'll we'll circle back to that later how we're handling now. But if you just probably all the way at the bottom is is a little older. Yep. There it is. Alright. There's your matrix. Right. So in this PR, we basically took a swing at implementing that, you know, JSON filtering the way it was described in that discussion. And by described, I mean, vaguely hinted at, because the description was a little poor. But we did implement it in that way, and it does actually work. So we do have, you know, queries for a little different databases. But as you can see in this table to the Chet's point earlier, not everything is supported everywhere, which becomes tricky immediately. Right? Because how do you then document it? Now it's gonna be database different. So there has been, you know, implemented some fallback support so you can see the difference in that green check versus the, that sort of Unicode check I guess. You you can see that for a lot of these things we're actually you know having to do some direct us magic to to make that work against the database which is not necessarily gonna be the most performant or the what's the right word? The the the quote, unquote right way to do it. Right? It's a blue check mark. Oh, if you're on Windows, just pretend. If you're on Linux or Linux, just Linux, just pretend. The check marks are green. Anyways, while building this, if you might wanna pull up the file section of this PR, Jonathan, I think it's a fun fun scroll through. This if we go all the way to the top, there's a tab. Files changed. 2000 editions. Oh my god. Yeah. There's there's only a 1,000 lines across 50 files. The long the long story short is that in the database helpers, if you see in that left hand sidebar, you know, we have to add all of the additional queries for JSON filtering ourselves for a little different database types. Sort of hard coded in, You see them here in the dialects if you just click one of those. It doesn't really matter which one. So all of those sort of you can see them here. You know, we have a JSON extract for MariaDB that may or may not exist across the other database vendors. Right? So it's not a SQL standard, which makes this a heck of a lot more complicated than you'd think. It's because every database does it differently. I believe it was Postgres that doesn't rely on functions even. It has like a special syntax with like, builders and arrows and whatnot. Which is pretty interesting. I don't know if the voice first 12 one has that in there. But anyways, there's there's, like oh, maybe this that's the one with the question marks there. I don't know. There's there's all sorts of different syntaxes is what I'm trying to say. Oh, here they are drawing with the arrow the the dash arrow arrow. That kind of stuff. So what we realized with this sort of initial work that we did on the JSON filtering is that, well, 8, super complicated as you can tell. There's a lot of additional work and a lot of additional logic, which means in turn you're gonna maintain. As always, because the more stuff you add, the harder it becomes to maintain. Right? But, the third thing is we're also starting to we also start to wonder like, okay. Instead of, having it as a function style thing in the query parameter, what if we do it more like the fields parameter itself? Right? Where you can just say, like in GraphQL, for example, you could just provide a nested tree that you wanna select instead of having to do it through a filter attribute. And at the same time, we've been working on a new data abstraction engine in the first place that already sort of, like it doesn't necessarily fix the fact that we have to do a lot of stuff ourselves for every database because it's just database specific, but it does it in a way that is designed to have database specific drivers rather than dialect specific overwrites. Right? So this is gonna be a bit of a more of a deep dive, but the way the API is set up right now is that everything effectively goes through connects, right? The SQL, what do you call it? SQL query builder that basically everybody and their mom is using. And what that means is that first it just becomes a sort of generic SQL, and then at the very end it's translated for the individual vendors. Right? So it's effectively this is a very crude explanation, but it's effectively just doing a find and replace for the quotes, making sure it's, like, the right quote for the right database. And then for some of the databases there's a little bit of additional magic, like for SQLite, you know, for an ALTER TABLE statement there's some magic included there. But the long story short is to add stuff like JSON filtering, it becomes tricky because now it's database specific. Right? So we don't really have a way and it was the same with the timestamp, helpers that we did earlier. We don't really have a way to make that agnostic across all of the different database vendors. Right? The second part there is that we know that we wanna support more database vendors over time, not less. So trying to do it in this sort of like make it generic first, add 1 by 1 overwrites to the dialects. It doesn't really scale anymore. Right? But end of the versions. A very good point, Tim. Because I think if we go back into the table, we already saw it. There's, like, differences between MySQL 5 and MySQL 8 plus, although 5 is now end of life. So that's a whole different discussion. But, we're the same for, you know, Postgres 10 versus 13 and up. So the way we're sort of re architecting that piece is by saying, you know, there is a singular, there's still a singular data entry point, but rather than relying on SQL, it relies on an abstract syntax tree, you know, Veron Design. Just a proprietary data format that explains to the engine what the data looks like that we wanna fetch. Right? And then for each of the different vendors, we're gonna have a driver that interprets that command and then just executes it in whatever way is appropriate for that driver. Right? So for a lot of the SQL drivers, we can still share a lot of that SQL magic like we're doing now. But it also means that we can start opening up the doors to other, you know, other data sources and other, JSON specific data sources. Right? So one of the reasons there, to keep going on that train of thought, is also because once we started getting super deep into JSON filtering, we also started to realize, like, well, if we implement the JSON filtering like field selection rather than, filtering specific functions, you end up with effectively just drivers for a document data store. Right? Which is very interesting. So with that in mind, we could also start thinking about what does it look like to use something like a DynamoDB or MongoDB or some other sort of key value slash unstructured document style store with the sort of direct to the API. Importantly, not treating them as a relational database, but treating them as a document data store, like leaning into the flexibility of a document database rather than trying to force them into a relational structure, right, like we've seen seen in the past and have explicitly avoided. That was a very long train of thought. But what I'm trying to say is, what we're getting at with that is that we've effectively not shelved, but we've sort of put the pause on this particular PR. And instead, focusing now on implementing this sort of JSON selection support directly into this driver based approach. Right? Because the one thing we didn't wanna do is add a lot of complexity add more complexity now only to then replace it, you know, in a couple of months with another breaking chain with completely different structure. But we've left open the PR because we definitely don't wanna lose any of work in any of the code because it is being, you know, repurposed repurposed into the new structure. In the chat, there would all be called features, not bugs. That's very true. Any bug in SQL is a feature. Got a question about the specs for the drivers. I believe we do plan to fully spec and document those the driver interfaces, right, for the new architecture? Absolutely. Absolutely. Yes. Yep. I mean, as of right now, it's still very much, you know, in in r and d, so we're not opening the doors quite yet on on building your own, but it is built basically as an extension in mind. Right? So we wanna make sure that those things are just do whatever you want as long as you adhere to the spec, you can you can, you know, save and read data from wherever the f you want. But, you know, we're definitely focusing on just feature parity with what we have first, and then expanding the scope there with JSON filtering and some other, you know, additional relationship types. And then, you know, Yeah. Yeah. I love the I love the idea that we're moving towards a an extension driven driver approach. Right? The same way we've done with so many other components of the platform, making it so that it's extensible. And it also means that you've got a custom data source. You've got an API. You've got other things. You'd have a spec and be able to build your own driver against some custom data source that you have, and be able to leverage the API and power of the Directus application on top of that. So very exciting very exciting 2024. Absolutely. Wait for the first person, implementing the, Excel sheet data store. I think this will be a a smash hit in Germany. All of the all of the companies love Excel so much. Oh, Maybe maybe The whole finance world rejoices. Oh. We should start that as a little competition. Whoever builds the Excel data store first gets a shout out on the website. Oh, you bet. Oh, you bet. A signed certificate of insanity by by me. Love that. The the worst thing is the the very worst thing is that people will actually, like, honestly use it, I'm afraid. It's it's not the using it part that that worries me. It's the relying on part that me. Anyhoo, circling back to, more of the the specifics of JSON filtering as a whole. Right? So just to circle circle back to to a requirements list because we we don't have, you know an RFC for JSON filtering proper we don't really have an RFC for what it could look like in new formats. Right? We do know that we want to support it from field selection perspective with that sort of nonstructured data store in mind. We do know that we have to support it as part of filters and sorting and querying and and all of that goods. That being said there is a very interesting difference between JSON objects which is sort of the assumed default that we've been talking about here, and arrays, which is where it gets real complicated real quick. Because one of the main questions or use cases where this is coming from is for fields like tags, right, where you just have a JSON list of individual strings for tags and then how do you search through those. Right? But now you're not so much talking about you need to make a nested selection of an adjacent path and then filter against that. Now you have to now search through each item of the the array. Right? So we do have some specific magic going on for one to many's right now like some and none for example to say I want all of the values in my related table to match x y z But we should probably add something along those lines for Jason when it comes to filtering specifically. I think. This is mostly again, for for those who've who've joined us in the past on these live sessions, for Rich, welcome back, the the goal is oftentimes, you know, to really, really diverge to find what are the boundaries of what we can or eventually maybe want to do with this, you know, and then converge back into what is realistic and what does that first MVP look like. This is really that diversion thinking stage. Right? It's like, how how far do we need to go when it comes to filtering on stored values? For example, do you need to be able to say things like, I want all of the JSON objects into an array to have a nest of property author dot agent. They all need to be bigger than than 12. Right? I've actually never used, like, field type currying like in Postgres or in the others. So I'm curious. Is there a function of the database that tells you whether or not the stored value is an array or not? Like, is there, like, some database layer check that tells you that? Is that possible? I I think the the realistic answer is we can't assume that there is because we're talking about, you know, good plan. Good plan. We're we're talking about various different database vendors, and we're talking about various different SQL like vendors that, you know, are are sort of SQL inspired, but not fully compliant. Think of, you know, the the I wanna say plan and scale, but they recently added foreign keys. So I think they might not be more compliant than they used to be. But those types of vendors, right, where it's like they they use the SQL syntax for basic querying, but they don't have a full SQL engine behind it because they just implemented the data store differently. So the answer is we don't know nor can know. Because we need to build this and design the the specs for this in a way that is sort of agnostic to a data store. Right? Tim was mentioning, you know, we have 3 different pieces of functionality extracting the data in fields, extracting the data from a field, and then using it to filter. And then with the deep filtering against stuff within that JSON blob, filtering inside the j oh, yeah. Yeah. Filtering inside of an area with a deep. Yeah. Yeah. Yeah. So So I think, ideally and this is a bit of a a different way of thinking in how we treat the API because it used to be very, you know, tabular data forward first is I think what we're leaning towards now is really treating JSON values f it as if it's just any other table effectively. Right? So we just treat an object as if it's any other item in the database. We treat an array as if it's any other table in database. And therefore, just allow you to use any of the regular query parameters as if it's a table. But you get some very interesting usage patterns at that point. Right? Could you consider using JSON schema to type and validate field? Absolutely. There's a very interesting thing, though, when it comes to to JSON and and typing slash validating, is that a lot of people sort of accidentally will start using a document database as a relational database, therefore, completely defeating the point of using a document database. Right? And this has been the the main, if not the only argument, that we've had against supporting MongoDB in the past because it has come up before. You know, there have been a couple of feature requests every now and again. But it was always the question of, oh, let's just use MongoDB instead of Postgres. But that really makes no sense if you did pardon my French. It makes no sense if you just if you just think about it real. Right? Because the a document data store has a lot of perks. There's, like, a lot of good things about it. But using it as a relational database is just not what it was designed to do best. So you're kinda just forcing it to do something it wasn't made for. At which point, it's like just use a SQL database. Right? If you wanna have relational data in a tabular format, use the SQL database. That's what they excel at. That's what they're best at. That being said, it's the overlap of the 2. You know? And so overlap of the 2 where it gets really interesting where you know, if you're talking about, things with rich content, like, you have views in an app or pages on the website or something, right, where you have fixed data points, fixed metadata points that you have for everything. You know, you have a title, you have an author, you have a published date, you have some of those pieces, like a status. But then you also have rich content which is gonna be, you know, a semi, semi flexible schema. Right? It's when you mix the 2 where I think it gets really, really interesting. But that does also mean that you need to be able to manage and search through and extract the nonstructured part with a semi known schema. That's where it gets tricky. Right? So, what made me think about that is the question, you know, consider using a JSON schema to validate the fields. If you're strictly validating against one schema of JSON objects, at that point, are you better off with using JSON or should you just make a couple of columns? Right? Because a many to one, field with a related table with structured columns will most likely perform better than trying to do it with nested objects. If, you know, assuming that there's gonna be searching and filtering and and organizing involved. Another question from the chat here was, Postgres allows for date ranges, which is effectively a JSON array. Somebody built a custom date range interface for this but cannot filter against it. That's the question is that something that will be supported by this pr or would that be something else? Yep. That's basically exactly it. You know, you have any sort of arbitrary JSON value you need to be able to manage and search through it. It is a table document for the record, though. Each record in a collection have significantly different content structure. Yes. That is a very good point, which is why the JSON schema again becomes interesting. Right? Because if you have a column that you want to be the same schema for every single row in your database, in your table, then relying on, you know, adjacent schema for validation makes a lot of sense because you want every one of those to be the same. At that point, you know, what becomes the benefit of using JSON over just columns? Right? That those those are sort of the questions that we need to, ask ourselves. Because the other one is like, if you have the because because the nice thing about the JSON thing is that anything could go in there. That's kind of the point. Right? It's like an unstructured data type. So what if you want everything to be different? Or maybe you want it to be one of 5 known different schemas, right, instead of just the same fixed object. I guess with an adjacent schema, you have union types, if I'm not mistaken. But food for thought. It's an interesting one. But it it does, you know, it does raise a different question that we need to write. What about validation? Right? Because, like, we have validation rules for, for regular columns now where you can say, you know, a number needs to be up and greater than something. For JSON fields specifically, we need to come up with something. Right? Using a JSON schema could be very interesting. Maybe a JSON schema for the whole record instead of just the JSON thing field could be interesting. Maybe both. Maybe it's a nested. Here's another another thing. Let's see what was the other chat related things on the same click. Somebody else, asked, I'd have missed this, but the idea is to integrate directives as existing filters at dynamic variables, etcetera, or would there be a new JSON specific functions? So that is a great question and kind of the the, instigator of rehashing this chat because we had this feature request. We sort of we're looking at it through the lens of let's make a specific JSON function like we have the others, and just implement that. Right? And then Tim went ahead and did it. Kudos. A tremendous amount of effort. But we also really started to, then, you know, it's it's kinda what we talked about with the the data abstraction piece just now. We started to realize that it might be way more powerful to try to treat it as an item rather than treat it as a value. So you unlock all of the other query parameters against the value. That answers. It gets it gets a little bit theoretical quick here. Soon as you use it relationally, it becomes even harder to maintain. Good point. Good point. Just started using as a MongoDB as a replacement for Redis for for a project that doesn't use Directis they are crypto so their relational is based on wallets. Makes sense You know, if you think about that use case, you have a semi structured data. You know roughly what goes into a wallet, but there's a lot of optional, fields oftentimes. Depends on the implementation, of course. But one thing you do know is that you have a fixed primary key that you can use for some relationships. Big sport of Jason's schema. Everywhere. Just for the spec Simpler, Tidebox for the win. I that used to be my go to and then a lot of others in in my team sort of using Zod, and I kind of have been converted to the The inference is pretty is not true here. Let's see what You kinda want to know that oh, it disappeared. Where it is? Oh, you kinda wanna know that a list of tags is always a list of strings or that an object matches a specific schema. Depends. I'd I'd say, generally, yes, but also really depends. For example, in to give a system thing as an exam right? For interface options, we don't know ahead of time what's going in there. Right? It's up to the interface to figure out. It's just anything. It's basically just a blank store for an interface to do some sort of options, but it really doesn't matter what the format is, what's in it, etcetera. Somebody shared p g adjacent schema, schema validation for postgres specifically. It's where it gets real tricky. Solution relied on the different filter syntax. JSON pass number to XPath. Yeah. Yep. Exactly. I think the, the the JSON schema example is is a good example of the the added sort of complexity that we're putting ourselves in here on purpose, which is that we're really really aggressively want this to be a standard in the API that you can use no matter what the data source is. And then it just depends on the data source how performant it becomes. Right? But I'd really, really would like to prevent ending up with, you know, a long table in the docs that says, can you use JSON filter? Yes. No. No. No. Yes. Yes. Yes. No. Yes. Yes. Yes. No. For for the different drivers, especially if those drivers are are third party maintained. You know, there's it's gonna be incredibly annoying. It does mean that for some of the data source, we'll definitely have to, we'll definitely have to, what's the right word, add add fallback logic and do a lot of that magic on the director side of things rather than on the data store but so be it You know? I I think that is a a trade off worth having, as long as these drivers can handle it themselves. Because Yeah. Right. We're fine. Scenario. Go ahead. Sorry. The the delay. No. It was, just for Brian's message, which was, the discussion point, my mind towards use cases like page builders, where I wouldn't need to create a ton of separate tables. That's true. And this is exactly the use case where I, in the past have used. You know, just sometimes if you very quickly want to throw together a component, but you don't really don't wanna create, like, different relationships and whatever. So you just quickly make, like, an h ref with the label and slap it together as adjacent, for example, very quickly, you know. That's that's a very nice use case for that. Yeah. Very quickly, very easy. Mhmm. For sure. And I I agree. Another interesting thing that he's mentioning there is saying that it would be better served by a NoSQL database. Right? That's where the interesting discussion really starts. Because if you're talking about, you know, a page builder for a website, then the pages themselves, just like what is the route? What is the title? What is some of the metadata? I'd argue that's probably better for a SQL database. Right? Because you're talking about structured data that you wanna query through on a column by column basis. You wanna say get all articles from, you know, January 2024, for example. That is gonna be faster in, you know, a relational database because it has, you know, structured data with known types and you could search through them efficiently. But the content part of a page, absolutely, it makes a lot of sense to have that as a page builder. Right? Because you have semi structured data. You have an array with 1 or more or 0 or more, I guess, you know, blocks of a known type. So that's where that JSON validation with schema comes in potentially. You know? And then that's the data is semi structured. So you have objects of, like, x different types, and then that has an array of those things. Yeah. So that makes a ton of sense. But I think the real power is the combination of the 2. Because if you were to try to do all of those pages in MongoDB, now you have to trade off of the sort of filtering performance and sort of the searchability aspect of it. But on the SQL side right now, you have the penalty of not having that unstructuredness of the data. Right? So right now, we would rely on any to any's for that type of use case, which has its own benefits around, you know, the searchability and the joinability. But, you know, if you're on pages where you don't really have to reuse, sections, for example, then, yeah, I agree. You're doing it as a JSON object for for a page builder makes a lot of sense. Right? But implementing it in a way where you get both is really the Oh, this this is why Brian, the father of, agent c OS, He he felt that pain probably, so he came up with this. Yeah. Yeah. But I agree wholeheartedly. Like, this came up exactly when I wanted to do then. If you have, like, one off components that you don't really, you know, they don't have something to do with other stuff or you want to get some data in, it's very, very, very neat. The repeater interface is very useful for that. Now there's one additional topic that we haven't touched on that I think we can easily spend 15 minutes on. And Brian is hinting at it right now, actually. Relationships. Relationships is where it gets real tricky. Right? Because right now, we know that's, article dot author is a foreign key to author's table. Right? So that that metadata is known. We know that ahead of time. So therefore, we can do things like give me everything, nest everything, nest everything, you know, start up start dot star whatever. And it knows what to nest because it knows what the relationships are. Right? When you're in within a JSON document or an area of JSON documents, there's really no guarantee that something is an ID or there's no real knowledge of what that path is and where it points. Right? So once you want to start nesting, data like that, it becomes tricky, tricky, tricky. Right? So within a a MongoDB context, if I'm not mistaken, it's like a MongoDB ID is always globally unique. Right? Right? So as soon as you have a nested data of that, I what what did they call it again? There was, like, a Mongo ID special type. I forgot what it's what it's called. Object ID is I think what they call the type. Anyways, when it encounters a value of type object ID, it it can assume that it's a globally unique ID and therefore knows what the what document to fetch. But in a sort of hybrid model, you don't necessarily have that luxury. Right? Because you can have a nested, categories array in your adjacent object that just says 123 with the assumption that you're talking about rows 123 in your categories table. Right? Because you're mixing concepts, because there's no such thing as globally unique IDs in a relational database. Kind of a design. I'm already imagining some type of custom format with, like, a dollar sign, direct to dot one or something, and then we have to translate it. And oh, no. It's a whole repertoire. We could do this. The same way as the chat, apparently. Somebody is saying direct us could prefix it. That's what I meant. Oh, yeah. We could. We could. But it does mean that we would have to make the data in your tables proprietary, which is something that I'm personally not a fan of. Right? It's like the system tables are one thing, because, of course, you need to have some metadata saved somewhere and we need, you know, a place to store that. But I really, really badly, desperately do not want to introduce any sort of direct to specific proprietary nonsense into your own database, in the user tables rather. So that's a tricky one. Right? So If we would do that, then we would probably also have to provide a way to extract the data, and it's it's more and more patches on top of patches. I don't think we wanna go down that road. The question really becomes like, how do you pull together a join? Right? And it's the same the same reason we haven't really, because Brian said relational repeater. Right? The reason why our repeater interface currently is not relational is because of this exact reason. It's like, how do we store the information of, you know, what field is relational to what? And the other tricky thing is, you know, without some real clever engineering from John and Nicklaus, most likely, There's no guaranteed way you can really do a join, based on the nested JSON value. Like some databases would, some databases don't. You know, it's the same sort of story. Right? So it becomes a performance nightmare. But what are some of our options there? Right? Just thinking out loud. One would be that we have some sort of, kinda like direct directors relations. Right? Where we're saying, well, if your JSON object has a path called, I don't know, article dot author, it we're assuming it's a foreign key, but that is always gonna be, you know, you can't really guarantee that. Right? An additional option would be to allow, you know, the end user to sort of pass the join information, right, manually through the API, where instead of saying author dot name where we know that author is a relationship, maybe there's some sort of syntax where you can just say author joins on table dot column, dot name. I don't know. Right? So you can pass it yourself. Can we just say we don't allow relations in JSON? I've I've tried that in the past and I think the answer is no. No. I I'm I'm afraid the answer is no for that. Because because that is that's kind of what, you know, that's the situation we're in now. Or, like, we have a relate we have a repeater, and people continuously ask, can we make it relational? And I continuously answer, how is that any different from a one to many? And then oftentimes, you know, we're just sort of in a sort of stalemate, which is, well, it's not JSON, but it does do what it needed to do. So we're good enough. Right? But I think it's fair to say that one of the design requirements of the system would be that we need to have some sort of way where you can relate data from a key that is inside of a JSON document. Yeah. I assume we would have to keep track of it. We've talked about this for the editor JS. Right? And or WYSIWYG being able to have great relational data structure that's stored inside there. There's actually a relationship to another Right. This object. Right? Another direct to side of them. So we we've we are thinking about this. We have some ideas around this. I think it's separate from this particular request. I think right now, this request is primarily around filtering and searching and finding It's true. Yeah. Yeah. Because because for the record, this really quickly turned into how to just properly do JSON indirect this rather than how to filter through an object. All all very much, you can see to that. Still important. It's it's part of the the divergence, the comeback to convergence. But, I just as as you guys have continued to talk about this, I think there there is a separate kind of feature function likely for relationals versus the ability to filter and just find stuff inside these objects and do it really and performantly. Yeah. The the WYSIWYG one is an interesting example. That's that's a good point because this everything that we've been discussing so far is basically gonna be the underlying engine slash foundational stuff for that type of use case. Right? Where in a in an editor JS type of environment, the line of text kinda like in like a notion docket, for for those unaware of editor JS. Each line of text is its own, you know, block effectively, and you have blocks of different types. But then a block could be a relationship elsewhere. Right? Which is a great example of this. Like, how do you know what that relationship is? In I'm pretty sure in in, you know, a Notion like environment or an Energy. Js type environment where you have, you know, a semi structured data, part of the solution there is that that joint information could be in the block, right, where you say you have an image block. And because you have an image block, you now know to look at images for looking up the related value. Right? But, again, that depends on the schema that is in your JSON field. And if we were to make that a requirement where we're saying, well, there's a there's a a god. Way to go, Brian. If we if we're saying that there that has to be in the JSON field and now we're making the form of proprietary again. Right? Because now we're requiring your JSON object to adhere to a certain spec. We designed just to be able to do relationships. Tricky. For a second, I was I thought you were gonna write down, this will be tricky. Rest rest assured. This will be very difficult. Is there not a normal standard for that already? Yeah. You'd hope so, but the answer is no. We gotta You know, there are some other CMS vendors that do this, but, again, they've got more, I think, generally well defined kind of hearts hardened structures. The ability to be agnostic is where this becomes very difficult. Yeah. No. That's a great point because we to say, this is the data model you must use. Therefore, this is I I can therefore rely on the fact that my structures are gonna match. But I think, potentially, with some of the discussion we had on the schema piece, you know, as part of this entire discussion here, if you were able to say, I am predefining schema. I know what that is, and then I wanna have enforcement validation and understanding that that's there. I think you we could ag you know, the agnosticity is you define the schema or we define a schema, but it's generic in the sense of you're not required to have it. You have a JSON field that's unstructured and doesn't have schema. But if you choose to implement that, then you are actually then getting the benefits of now I can I can kind of quote guarantee as much as you can guarantee in a JSON world? And I know what my data structure is, and I know how to how to interact with that event. Yeah. To be frank with this, it's just very, you know, very, very difficult. I mean, we we could, you know, like, just, willy nilly, we could keep track of, some keys in a new table that says, hey. This relates to that, like, similar to, like, relations, like we do it right now. And, well, try to keep that instinct. It's well, So when you're talking about the under, other vendor use case, right, I think for the majority there, it's basically around the idea that you have a globally unique ID. Right? So it's very document database first approach. If you've ever created a so we we use Notion a lot. Right? So if you ever create a Notion page that links to another Notion page, that is just a globally unique ID. So therefore, you can nest it however you want and it knows because everything is a page at all times and every page has a globally unique ID. So therefore, that sort of relationship lookup kind of solves itself. Right? Because you don't need to know what collection or what table to look at to find the item because it's all globally unique. Right? The difficulty here lies in the that hybrid model where you want to nest a relational record inside of a document, which itself might be, you know, related from a relational record relational row. Oh, do we like to make things difficult? But that's why it's cool. Let's see. There is a portable text spec. Absolutely. Which, again, I think it would be great to have a some sort of, you know, portable text interface or, an extension that handles that specific format. But I I it would be a shame if if this everything that we're talking about only works if you're in a specific format. Right? Because who are we to say what the right format is? I agree. I just looked at the, audible text one. Yeah. It look looks very, very similar to how everybody else does it. You know? You have some things. You define your type, and then you have some keys depending on that type. Great. Cool. Alright. We're quickly reaching the top of the hour here. It was it was a good idea to only focus on one topic for these sessions. Just based on prior experience, it always turns into this should be quick and then you you talk about it for an hour. Just to give a chance to chat, any other thoughts, questions, concerns, ideas, shout outs. I wanna say hi to the family at home. This is the time. Or when they start typing. Everyone is typing. Config is code. Uh-oh. I'm pretty sure we have a separate one of these dedicated to that again coming up. Ship it. Ship it? I'm not gonna I'm not gonna say it right now. Let's go. So for 20 minutes for brings everyone here. Same. Yeah. The next one's gonna be Comic It's Code. Cool. Cool. Cool. Cool. Cool. So make sure to come back for that one, saucy, saucy. Cool. Well, if there's no other questions, thoughts, feelings, or concerns, let's wrap this one up. It's Brian just said the conversation about config as code is is in 2 weeks, not the release. I think this is a very, very, very important Thank you, Brian. Thank you, Brian. It's Android. So it's it's imported immediately. Go now. What have we done? Cool. Well, all that being said, everybody. Thank you so much for joining us here live. Thank you so much for watching this at home. If you're seeing it on Directus TV, for the people live here, if you don't know what that is, check it out. Directus dot a 0/ tv. I think I got that right. Right? It'll be up in about a week's time. But for now, I wanna say thank you all. Good luck in Godspeed. And we'll see you in 2 weeks.","published",[149,159,168],{"people_id":150},{"id":151,"first_name":152,"last_name":153,"avatar":154,"bio":155,"links":156},"23ebcf2c-4374-4f5c-8198-f8ad497fd856","Rijk","van Zanten","7ef9652f-3835-432c-a43a-c5fe13001f31","CTO of Directus",[157],{"url":135,"service":158},"website",{"people_id":160},{"id":161,"first_name":162,"last_name":163,"avatar":164,"bio":165,"links":166},"0d906492-75f0-45d9-abf7-ab779bf1ed08","Jonathan","Wagner","5062e4df-a198-4b40-af47-42362d3c0551","Sales Engineering Manager at Directus",[167],{"url":138,"service":158},{"people_id":169},{"id":170,"first_name":171,"last_name":172,"avatar":173,"bio":174,"links":175},"07ec688d-251d-4efe-bc17-73848402d43b","Daniel","Biegler","8897b70f-c524-460a-8990-58cc5c3be886","Engineer at Directus",[176],{"url":141,"service":158},[],{"id":179,"number":142,"year":180,"episodes":181,"show":194},"6aa046f1-bd53-4510-9af0-c0f3daaf4415","2024",[122,182,183,184,185,186,187,188,189,190,191,192,193],"86fa152b-6a8b-477e-94b5-bd91e1202d21","0b5f4343-1494-455b-b41a-25811c151242","b2b01569-d8c6-49a7-adaa-429fe84f204f","b63afbe1-6418-4e9e-b1da-4890979789f0","69ad81e8-5e1d-4b85-9fa9-3b767a3a3478","5c9c888c-f527-4608-a2f7-56f156d00980","243daa59-3772-4ebe-b212-c2a09a4a0b71","d66c1e46-cc57-49fe-a914-2e440bbc1576","12c8f72d-22fa-4ffa-a9d1-57047216fd1a","8896c934-aa2c-43b6-9342-8275682ab8b2","84c7b3ac-fd85-4539-8f39-3247118bcbf2","044b7c89-aaec-43b2-9d6d-6743a0fb5afd",{"title":195,"tile":196},"Request Review","73687d01-3734-4c28-aef7-e6fa8db4cf1e",{"id":182,"slug":198,"season":179,"vimeo_id":199,"description":200,"tile":201,"length":202,"resources":203,"people":206,"episode_number":210,"published":211,"title":212,"video_transcript_html":213,"video_transcript_text":214,"content":8,"seo":8,"status":147,"episode_people":215,"recommendations":219},"config-as-code","906788449","In this recording of our live event on January 25 2024, Rijk, Jonathan, and Daniel discuss configuration as code.","bd5024fb-4ef7-455a-8ff6-8631da26b5d2",56,[204],{"name":130,"url":205},"https://github.com/directus/directus/discussions/13041",[207,208,209],{"name":134,"url":135},{"name":137,"url":138},{"name":140,"url":141},2,"2024-02-01","Configuration as Code","\u003Cp>Speaker 0: Welcome everybody once more to a wonderful request review session here where we go over feature requests and figure it out. Now what do we do? I'm afraid we ramble on for about an hour about the technical complexities. Remember, the goal here is to basically divergently discuss, you know, what is the feature request, what are we trying to do, what is it trying to achieve, And how do we think we can make it happen in a very sort of direct to see way? What are we talking about this week?\u003C/p>\u003Cp>Speaker 1: Yes. We're talking about configuration.\u003C/p>\u003Cp>Speaker 0: Configuration as code. Let's let's figure out how to take schema endpoints to the max and actually Schema endpoints to the max. Manage the entire project as code. So this is really with a focus on GitOps. Right?\u003C/p>\u003Cp>Where you have a sort of centralized repository of static files that is the single source of truth for all configuration of the running project. Which as you might guess, they get complicated fairly quick. Hello. And as per usual, we'll be eyeing the chat. So if you have any questions in between or any suggestions or any good thoughts, please do please do put it in the chat.\u003C/p>\u003Cp>I already saw his name fly by. Well, most likely I have a very special guest for you today, because our very own Connor has been researching, you know, some of this for a little while now. But before we dive into the research results there, let's discuss a little bit of the requirements that are presented here in the current feature requests. Right? Because the one thing we know now, you know, the current state of affairs, we have that schema snapshot and apply endpoint, that we use and sort of recommend for, you know, moving bits of schema, to and from dev to prod, that sort of thing.\u003C/p>\u003Cp>But as people have pointed out, you know, that is still for schema only. Right? So we know one of the big requirements for this is gonna be you need to figure out additional configuration, additional additional, data points maybe from your own tables, you know, environment migrations, like you mentioned there, which includes, you know, what about roles? What about flows? What about presets?\u003C/p>\u003Cp>What about translation strings, etcetera? This well, one of the complexities for this is figuring out, you know, what is configuration within the context of directives in the first place. Right? Which is a discussion topic that I have had some trouble with just going through myself already, which is what is configuration? You know, are your roles and the way you configured permission configured, you know, permissions configuration, probably.\u003C/p>\u003Cp>But the users within those roles, probably not. But then users with static tokens, maybe. Right? If you have your own tables, maybe you have a single, you know, app settings, singleton collection that you use for configuration is that now configuration that is part of code first configuration. Right?\u003C/p>\u003Cp>Even though it's not a system table and you're not configuring directives, you might still be configuring other things. Although, that's where the fun starts. So maybe we could scroll down a little bit, Jonathan, just take a quick peek through, the other the motivation and the requirements here. So, you know, as we kinda touched on already, the same here from, Erif van Oort. Pretty sure that would be a Dutch user.\u003C/p>\u003Cp>It's it's about things like permission logic, you know, keep the local dev environment in sync, source control is the source of truth. Right? You wanna make sure that you can spin up new Directus instances not completely empty, but start it from, you know, a template that is in your repo. If there's an issue, you can easily, you know, share, the configuration of your platform. Daniel, if you would kindly mute, you're being very annoying.\u003C/p>\u003Cp>From the replying system, this is immediately where where it gets complicated. Right? It's like, what is configuration? Right? What is configuration?\u003C/p>\u003Cp>When it comes to import export, how to define what gets imported, what gets exported? Basically, the same question to me. Right? How does it get imported? You know, are you merging stuff?\u003C/p>\u003Cp>Are you overwriting stuff? What happens if you try to insert something that already exists? You know? How do you deal with conflicts? Very good question.\u003C/p>\u003Cp>So if you wanna scroll down a little bit further to see what else is in here.\u003C/p>\u003Cp>Speaker 1: Yeah. No. I don't know about that point. That's a good question, but a very, very long one to answer properly. But the gist is, you know, if you work with multiple people with different setups and if somebody changes your database schema, for example, how do you synchronize the state between your instance and another instance?\u003C/p>\u003Cp>You can do that with our schema endpoint. You can we already have that, capability. But, technically, you would want to or ideally let's say ideally, you would want to set up your configuration as code, because then you have a single source of truth. If you're developing a new feature, for example, you need a new table, you need new fields, you want to test something, you wanna try something, but then you, you know, delete some fields, How do you get the changes synchronized between different setups? And even the problem gets even larger if you have an organization, for example, with, let's say, I don't know, one dev department of, like, 8 people, for example.\u003C/p>\u003Cp>Stuff gets really gnarly really quick. How do you synchronize then between 8 people, for example, between different branches, different features, different collections, different fields? You know.\u003C/p>\u003Cp>Speaker 0: Let alone a test team of 200. Right?\u003C/p>\u003Cp>Speaker 1: Yeah. And this this is, you know, for a very small team, it can get quite gnarly pretty quickly. But, you know Yeah.\u003C/p>\u003Cp>Speaker 0: There's a couple other things there too. Right? When it comes to the git repo flow specifically is that any change to the schema of the project is now sort of, like, version controlled, so you know what happened when and you can roll back. And you have accountability because you know who made the change, through that sort of git first approach. Right?\u003C/p>\u003Cp>The other main thing there too, I think, is from a database template, you don't have files, which is one thing we'll touch on in a and the second thing is it's database vendor specific. That's another thing. Right? Like, you could plop the whole SQLite file in a repo, but, you know, if you wanna move if you have a local dev instance that uses SQLite and you wanna go push your change into production in Postgres, now you have a workflow trouble. Right?\u003C/p>\u003Cp>Even if you have local Postgres, the server Postgres, you might go, you know, I don't know, Postgres 10 to 13 or something. If there's a version mismatch, you know, there's there's things to to consider there. Of course, there's third party tools. I see Ansible mentioned here that you can use to sort of move databases across, sort of thing. This would really be sort of direct as native way to move configuration around.\u003C/p>\u003Cp>Right? Which I personally see it as an improvement or an upgrade to the schema snapshot system that we have rather than a completely new thing. Just the real question just becomes, you know, how do we add more stuff into that so you can use it for this? That's that's really the, to me, the the underlying discussion. Right.\u003C/p>\u003Cp>Jonathan, if you wanna scroll down a little bit further, you can see if there's any other points. Wanna make sure we don't forget. Export considerations, multiple files. I think that's a very important requirement because we've already seen some of the schema snapshots just get bonkers large. Right?\u003C/p>\u003Cp>Because if you have a 1,000 collections, a total of 25 100 fields sounds insane, but it happens in the wild. The one export file, you know, is is megabytes and megabytes and megabytes worth of of JSON. Tens if not 100, which gets unwieldy pretty quick. It also makes it more difficult to import, by the way, because we're not really able to stream it all that well and then it becomes a very large file. So you have to read it into memory and then and then use it.\u003C/p>\u003Cp>Let's see. Selective export. That, I think, is a tricky one. Right? How do you know what you're exporting if you, consider your roles and permissions part of this, but you have one admin dev role that you don't care about for your production instance, How do you pick and choose?\u003C/p>\u003Cp>Right? Pick and choose what to what to include, what what not to include, and handling, you know, sensitive data. Very good point. You know, is this gonna be plain text in a static file? Tricky.\u003C/p>\u003Cp>Right? Tricky in a repo. If you scroll down a little bit further, The modular files of extensions, single file per collection, you know, we kinda test.\u003C/p>\u003Cp>Speaker 2: Does\u003C/p>\u003Cp>Speaker 0: does it actually make more sense to have selective import versus export? Great question. Great great question. Maybe. Maybe.\u003C/p>\u003Cp>Yeah. It's it's like if you have Go.\u003C/p>\u003Cp>Speaker 2: I'm sorry. Go ahead.\u003C/p>\u003Cp>Speaker 1: I I don't have to remember to mute and unmute myself between every sentence. That's fine. Yeah. I can see both being very useful. Right?\u003C/p>\u003Cp>For example, if if you have a very, very large, instance with, like, Greg mentioned, right, like, a 1,000 collections, And on your dev instance, you only want to add one thing, do you really need to export, like, this whole thing that's, like, I don't know, 10 megabytes or whatever? Maybe, you know, maybe, it would be enough to just export that table with its fields, and you'd be good to go, because then you could import that partial instance, maybe. But, yeah, for for import or for export, both could be useful. But, yeah, like we said, it's just lots lots and lots of stuff to talk about there. Yeah.\u003C/p>\u003Cp>Speaker 0: TBD is is the honest answer. I I also feel like both is probably where we need to end up with that. Because to your point, if you have a large project and you only care about a small subset of that as a sort of templatable piece, you know, you don't want to export everything and have a bunch of unneeded data in your repo muddying up, you know, the workflow and the reviews. Because then also imagine that you make an export and then now you have a PR of, like, 16,000 lines of stuff that you don't really need. Right?\u003C/p>\u003Cp>But, yeah, let's see. Extend, you know, existing schema files. That's an interesting one. Merging multiple together, importing snippets from other files, maybe, you know, from nested collections. So that's all about, you know, the the file structure for the project.\u003C/p>\u003Cp>Saving the non defaults, I think that is more of a technical requirement to me. Right? It's like we don't have to save default values from Directus in the schema snapshot because they're the default values. Dynamic configuration sync, it's just whenever you make a change in the studio, it auto exports basically, which feels heavy, personally. Feels a bit heavy, but could could potentially work depending on the file format.\u003C/p>\u003Cp>But then again, how do you choose what to export on the automated one? Right? So TBD. That's also why it's a could have. I mean, they've they've thought about it luckily.\u003C/p>\u003Cp>Automatic real time sync, sort of similar idea. Right? But an option in the Data Studio API triggered chrono periodically high. So the one thing I do notice in the requirements list here is that there's a lot of talk about how to get it out of Directus and in what file format, but not so much the other way around. Right?\u003C/p>\u003Cp>How do you get it back in? So if you have something in your repo, whatever that something is, what does that code look like and how do you get that back into the Directus instance? Right? This might be a good point actually, a nice little segue. Like I hinted at at the beginning, our very own Connor has been doing quite a lot of homework on this just to figure out, you know, the, the format and some of the ideas around this and how it could work.\u003C/p>\u003Cp>So if let me see if I can find him. Where is this little so many here. Look at that. Hello, Connor. What have\u003C/p>\u003Cp>Speaker 2: you been what have\u003C/p>\u003Cp>Speaker 0: you been up to recently?\u003C/p>\u003Cp>Speaker 2: I have been up to quite a bit involving this config as code and how it plays into all the other different parts of direct disc that we wanna do. Let me get my notes up. Here we go. So you said you wanted me to talk about the structure of the exports?\u003C/p>\u003Cp>Speaker 0: I think it'd be cool if you wanna give a quick overview of sort of the the research process itself. Like, what are the things that you've been looking into? What have been the considerations or requirements? And sort of the things that you found. And then dive into some initial conclusions.\u003C/p>\u003Cp>Speaker 2: Sure. So what I have been going through and researching is basically we have a couple of different feature requests from config as code to templates to migrating between instances to migrating between different databases. And all of it sort of involves, you know, moving configuration between instances, moving data between instances, and moving files and assets between instances. And that is a very big task when you're trying to be database agnostic and you're trying to be efficient. You're trying to have you're trying to support multiple different use cases where sometimes you wanna overwrite everything, sometimes you just wanna bring in some stuff, sometimes you only wanna take out some stuff.\u003C/p>\u003Cp>Sometimes you wanna stream it all. Sometimes you wanna have the file small. So there's a lot of very different considerations that go into it, and then making it all happen with one sort of directest way of making it all magical. It becomes a very big rabbit hole that you start diving down into. And so one of the stuff one of the things that we've that I have been looking at is, you know, what are all the different use cases for it, and what are the requirements for all those different use cases?\u003C/p>\u003Cp>And so configuration as code is one of the use cases on there. It's not completely fleshed out yet because that was not on it's one of the later goals of what I've been working on. But with it brings, you know, how do you integrate it with CICD, you know, GitHub, GitLab. You know, do you have your own hosted GitHub, You know, self hosted GitHub. You know?\u003C/p>\u003Cp>Where do all your stuff is stored? And so there's a whole bunch of different parts of it. Right now with the schema service, we go and we give you you export a schema of your stuff. It exports everything. You diff it against your instance, and then you apply that diff, and it gives you, like, the changes if you can do it or not.\u003C/p>\u003Cp>And right now, that's really it. There's not really too much to the schema service outside of that right now, and adding in all these different layers and features. The schema service is definitely gonna have to take a new look to it. And so one of the things that we've been looking at is, you know, that initial export of a schema, you know, making it more of a distributable type of folder structure file structure, whether it's a compressed zip file or some type of other special file. But basically redefining how that schema export looks to be able to hold all these different configuration items, to be able to hold data, to be able to hold assets, and defining that structure and, you know, you know, as Ryke mentioned earlier, you know, do you want that stuff stored in plain text or do you want it to be stored in, you know, some type of encrypted format or do you want it to be compressed?\u003C/p>\u003Cp>So there's a lot of different variables there. And then once you take that distributable that gets made, it can be I mean, for some instances, if you've pulled out data and assets and configuration, you know, that thing could be huge. And so we wanna go we wanna bring that into the this new instance of the target instance. Right? And so we need to different and change it.\u003C/p>\u003Cp>And so bringing all that in and processing in is a whole another thing. You know, do you wanna bring in all of it? You know, you have all the export controls. Do you wanna have import controls to how it gets implied and, you know, how it gets imported? And so I've been going through and documenting all those different ways that we can do stuff, you know, what is dependent, you know, if we wanna do this, then, you know, we have to do that, you know.\u003C/p>\u003Cp>And so we've been looking at one of the things this week, you know, is what type of file format for all of this type of stuff, for how it gets really big. You know, if it gets a lot of data, if somebody has a 1,000 collections and 4,000 fields, you know, is a CSV file, a JSON file really the right file structure to store all of that data? And so we've been looking at, you know, different options and different file formats for storing, you know, structured data like that in an efficient compressed way that also lets you keep the schema of the schema export defined and structured in a way. And then also making sure that we keep that same right now, we hash the schemas and stuff so that they all stay. You know?\u003C/p>\u003Cp>You could only use the schema to apply to this instance because you just did it with it and yada yada. So having that in there too, you know, do we have a metadata file inside of that export that, you know, talks about what the export is? You know, do we have it? Do these become an extension type, you know, that can be used throughout the instance in different places? You know, there's a whole bunch of different options there.\u003C/p>\u003Cp>Speaker 0: Yeah. Yeah. Absolutely. It\u003C/p>\u003Cp>Speaker 1: was a great intro. Yeah. It was a very good yeah.\u003C/p>\u003Cp>Speaker 0: Exactly. Exactly. Yeah. So the first order of business to your point, you know, figuring out what does that file format look like. We know some of the requirements now based on this discussion that we just looked at.\u003C/p>\u003Cp>We know some of the the downsides of the current format. So that's a great step. Then, of course, the second big step will be figuring out, you know, how do you go from that sort of source of truth overview into applying it for realsies. Right? So we have that sort of diff step in between.\u003C/p>\u003Cp>So for those unaware right now, if you upload a schema snapshot into the Directus API, it will compare it to the current state of the database and then return, you you know, the the the list of differences, basically. So it's a diff, not a list of changes. As in a step step by step list, it's just a diff, like an a a versus b. And then that diff is then uploaded to an apply endpoint, which will basically, you know, apply the changes required to get rid of the diff, right, to make sure that the that the 2 are in sync, that the instance is in sync with your file export. So based on that, Connor, we've done some research on what needs to happen on that diff endpoint itself too.\u003C/p>\u003Cp>You wanna you wanna share some insights on what we know now, at least, are some of the requirements to make that work properly with all of these new new additional features that we're trying to add in.\u003C/p>\u003Cp>Speaker 2: Yeah. So with that dipping endpoint, some of the things that we are looking at is, number 1, if you wanna bring in data. Right? You know, how do you diff large amounts of data? Are you able to diff large amounts of data?\u003C/p>\u003Cp>That's one of the research things that's on the list. You know? Right now, we have an import and export service to import and export data. Looking at Attica drive run options. You know?\u003C/p>\u003Cp>Can you import this data? Can you export this data, for that diffing stuff? You know, if you have a really big file, so you do have 300,000 collections and fields, you know, that's gonna take a long time to make changes to the database and to go through and find that diff. And so having some type of long task runner on the instance that's able to sit there and work through that to that, diff or making that diff or distributable or whatever it is. You know, having such a long running service of the background of your instance, I can handle that.\u003C/p>\u003Cp>And then also if you're going through and you're applying all these big changes or diffing it or whatever, you don't want people in your instance changing the stuff as you're trying to change stuff. So implementing some sort of maintenance mode on your instance that basically locks it down and puts it, hey. We're making changes right now. You know, you can't it doesn't let anyone else change the schema or anything or the data or whatever you want it to do. We also, have been looking at, you know, for asset data, you know, pulling in you know, do you pull it in from the distributable file, or do you pull it in from the data the asset source?\u003C/p>\u003Cp>You know, do you pull it in from the s three bucket directly? You know, do you use it like that, or do you package it into the distributable? Or, but, basically, for the diffing part, the other part is that if you have a really, really big distributable or schema thing, whatever it ends up being called, you also upload downloading it from one instance, uploading it to another instance just to download another big thing, just to upload the other big thing back again is a lot of moving back and forth of all this different stuff. And so the other thing is when you upload that schema, whatever, it diffs it. Instead of it downloading the diff back to you, then you having to send it back up, it being able to just keep the diff on the instance and you just being able to tell it to apply the storage diff that it already has.\u003C/p>\u003Cp>And so you don't have to have that all that network changing back and forth, and then, you know, Internet goes out, then you're screwed. You know? But that's one of the things that we've also been looking at for the diffing. And then another thing is, you know, different types of strategies of diffing and importing. So that, you know, do you just wanna up cert stuff?\u003C/p>\u003Cp>Do you just wanna add new things and you wanna ignore everything else that has conflicts? Or do you want it to only apply if there are no conflicts, you know, or do you want it to overwrite everything, you know, so it doesn't matter if there's conflict. We're gonna rewrite over it with everything, you know. And then instead of just returning a singular diff that just compares the 2 different schemas and it just says, hey this is what's different. You know putting in more migration like making it more of a step type thing.\u003C/p>\u003Cp>So it works through migration steps. And, you know, oh, you need to do this. You need to do this. You need to do this. You need to do this.\u003C/p>\u003Cp>And basically a workflow that the thing can work through and and guide those long running task runners on what to do and how to configure your instance. And I\u003C/p>\u003Cp>Speaker 0: think last but not least, the having some sort of format to expose potential conflicts for manual resolution. Right? So if one of the strategies has to be that it's up to the end user to pick and choose what to do. So imagine if you go, you know, from a dev to prod, life cycle, right, where you're not so much delete everything and insert everything, you wouldn't wanna do that in a prod obviously, And if you have an absurd strategy, but there is a conflict, right, you you have, like, a foreign key that doesn't work anymore or something like that. There needs to be some sort of format in whatever this diff looks like or this migration step format looks like that just has, a list of Here's the steps with the known conflict What do you wanna do?\u003C/p>\u003Cp>Right? How do you wanna modify that that step, those steps to get around the conflict? Right? Do you wanna upload, you wanna upload new data, or do you wanna, ignore that particular step, or do you wanna ignore those records? You know?\u003C/p>\u003Cp>So to your point, if we need some sort of driver and to check if you can import all of the data, it's sort of a requirement in order to be able to extract, you know, potential conflicts. So we need to have some sort of way to search through the data you're trying to apply, in order to know how to deal with conflicts. Right? So now that we're talking about all of this, what we're what we started to notice is that we're not so much talking about, you know, configuration as code specifically or templating specifically. What we're basically shaping here is a system that works for multiple things, right?\u003C/p>\u003Cp>Depending on how you use it. So if you were to make, a snapshot of everything, just full stop everything, and you import it as as apply everything, what you're talking about now is basically backup restore. Right? If you're exporting a small fragment and you're importing that into another project, you're basically talking about templating. Right?\u003C/p>\u003Cp>If you're exporting just the schema part and no data and you apply that to a new project, you're you're talking about seeding or something. You know what I mean? Like preparing preparing, a database basically, a a new project for what you wanted to do. And the question now is, how does that how does that all tie together how does that tie back into the configuration as code parts specifically, Connor? Because what we're talking about now is, you know, a new sort of format, generated by Directus that you can save somewhere, you know, which is fairly still, it's still fairly proprietary because it will have to be heavily compressed and, you know, directors needs to know what the format is.\u003C/p>\u003Cp>So what is the current thinking on tying it back into the code side of this question? Right?\u003C/p>\u003Cp>Speaker 2: Yeah. So if we went the route of having some sort of distributable file structure folder structure that is some proprietary format or is encrypted or compressed or whatever, you know, you're not gonna be able to sit there and write code that is a compressed file. You know, you're gonna have to have write something that generates that file. So one thought that we've been having is following the lead of some other types of, you know, companies like AWS and their SDKs. So, basically, having some type of SDK that you can write and configure your instance with, and then you tell the CLI or whatever to execute that, read those different set the code that you've written, and then it will make a direct distributable file, diff file, whatever it is, from the code that you've read.\u003C/p>\u003Cp>So if you wanna go through and you wanna define all of your collections and your fields or whatever, and you can go in and define that in all your files and your code and then execute that code, it comes up. It generates that file that you can then use to apply those changes, import those changes, diff those changes to any of your target instances that you want to.\u003C/p>\u003Cp>Speaker 0: Jonathan, if you might be wanna pull up, I think one one piece of inspiration that we were looking at for that part specifically was AWS's, what do we call it? CDK, I think, code development kit. If you wanna quickly Google that, it could be it could prove like, it could put some flavor to that to that point. So the way AWS has that, they basically made a JavaScript library that you can use to code, like, configuration. And then what it does under the hood is it effectively converts it into, a CloudFormation template, I wanna say, and then applies it immediately.\u003C/p>\u003Cp>Right? So under the hoot, you don't really notice the difference, but it's effectively a 1, 2 jump. Right? So it converts it into their proprietary thing in the middle first and then just applies that as is. What am I searching for?\u003C/p>\u003Cp>Sorry. CDK, the cloud development kit. If you wanna pull up the GitHub repo for that, maybe I have a link somewhere. I'm just curious if they have some some examples somewhere. It's been a minute since I've played with this, but it's an interesting, idea.\u003C/p>\u003Cp>There was a Directus community library, a little while back that that tried doing a similar thing, but it would run it against the API endpoints. It wasn't as flexible yet because we didn't have we don't have, you know, GreenStep. Is this branch? You know. If you wanna do AWS before that, because I think CD case is all different.\u003C/p>\u003Cp>Yeah. There we go. There's a link in the chat as Open it up. Here we go. Here we go.\u003C/p>\u003Cp>Here we go. Is it gonna go? There it\u003C/p>\u003Cp>Speaker 1: goes.\u003C/p>\u003Cp>Speaker 0: So this is an interesting reference for people that wanna look it up. At home, it's basically, you know what was that? Distracted by the chat immediately. Using something like CDK would mean that changes would need to be replicated from the UI to the generation scripts. Changes would need to be replicated.\u003C/p>\u003Cp>That's a great point. Yeah. How does that go both ways? Right? Because if you have that one format in the middle, directors can recreate that format in the middle.\u003C/p>\u003Cp>The directors wouldn't be able to recreate arbitrary JavaScript, basically. Right? So when you opt into something like that, I think it becomes a one way street by definition because we cannot figure out what parts of your JS file are, or your code because they also have some other languages, but you get the idea. And we don't know what parts of that file are auto generatable and what parts are, human created. Right?\u003C/p>\u003Cp>There's so there's no way to auto generate that back into a manually created file. So at that point, that's a great point, but it's the it really becomes, you know, it becomes a a one way street at that point. Some of the data community did a sort of proof of concept library to do this for, which is very interesting. So if you wanna pull up the direct to community schema builder kit repo, I just sent a link in the chat there. It was very much inspired by a similar idea where you have a JavaScript file that you use to sort of define.\u003C/p>\u003Cp>It's almost, you know, a declaration file rather than, you know, JavaScript, but it is still just JavaScript that runs from top to bottom. But you could define your schema and how it's applied as, you know, individual build steps, in a JavaScript. So this is where it gets real heavy on the code part of the codeless configuration. Right? And not so much just the, moving stuff around.\u003C/p>\u003Cp>So in in terms of big picture stuff, I really see this as the final step of whatever these changes are that we're discussing. We'll have to start with what is that new format in the middle, how is it generated, how is it used, and then see this as a way to sort of generate it into that format and then apply it automatically. Right? But, yeah, that that JavaScript syntax is an interesting interesting idea. So, yeah, I see some folk typing.\u003C/p>\u003Cp>This is one of those very typical director's projects where there is about 600,000 different opinions on the ideal way of doing this. And I think we we saw it in the chat immediately. Right? Shout out to a person that was like, isn't it not just the database template? Why why bother?\u003C/p>\u003Cp>Right? Which I can totally get behind that, but then there's a 180 or so of votes or something like that on the on the discussion. So apparently, you know, that is not an opinion that's shared.\u003C/p>\u003Cp>Speaker 1: Yeah. What what's interesting is, right, because, technically, the most basic example would be something like a database migration, generally, in the beginning, you know, for the configuration as code, for example. So there's another, a similar project director, the CMS like project that I've checked out and see how they did it. And, so they handled this a little bit differently. They don't have, like, a DSL type, you know, language that defines your infrastructure or whatever.\u003C/p>\u003Cp>But, they went the route of as soon as a person or user, via the UI creates some type of change in the tables or the collections or however you want to call it, the instance automatically generates a migration file locally for that specific change. And, there's then a mode in the instance where you can disable any, any ability for other users to change the actual instance. So you can actually just rely on the migration files, which is an approach that you could take, you know, because a migration file then could technically do anything you'd like, you know, with with regards to, you know, the collections, the fields, whatever, even inserting, items. But, then, you know, because we're a directors, we want to that's a little too easy for us because we would like to include some type of things like, alright. How about you, locally, you develop some type of new feature, new new table, new collection, new fields.\u003C/p>\u003Cp>And then in order for that to work in the way that you want it to work, you need an item. You know? You need to include a new new row, data row, or an asset. This is the thing now. Because, you know, assets are not inside of the database.\u003C/p>\u003Cp>So we want to include assets, for example, or maybe this, of course. Nothing is, you know, set in stone, but, you know, including assets, for example. So you want to make some changes, and you need to include some assets for your changes to be even, you know, useful. So you would then have to, you know, do your changes, test it locally, include everything with the correct file name, with the correct row, whatever, or other metadata of an asset, for example. But then on production, you would have to replicate that again.\u003C/p>\u003Cp>So you get this back to back to step 1. Right? So this is kinda it kinda sucks, with a migration part. So even then, if we want to include this, then we get back to the issue at hand that we were talking about. Right?\u003C/p>\u003Cp>We want to have a process that could export something, and you can recreate that between instances and so forth and so forth. I would, I just wanted to mention that for the others in the chat because, it's not just about, you know, just adding a field because that's basically, you know, that's a basic thing, which we could solve. And I think the Director Schema Builder kit is basically that. Right? You you generate some type of syntax, which generates some type of migration.\u003C/p>\u003Cp>But you have to keep in mind then, of course, right, different database vendors, we have to abstract that. Because, for example, you know, in SQLite, if I remember correctly please correct me. If I remember correctly, like, you can't alter a table and introduce, like, a foreign key. You're forced to drop the table, actually, and recreate it in order to add a foreign key, for example. Other databases can do that as CLI can.\u003C/p>\u003Cp>So there's lots of different\u003C/p>\u003Cp>Speaker 0: honest there, I'm pretty too sure that the last minor release of SQLite didn't come out too long ago. They finally do have that alter table sort of baked in. Although then, you, of course, you have the you have the side effects that it depends on your native build of SQLite on your machine, which may or may not have had. So generally speaking, historically, you've been absolutely right. It's been a nightmare and a half to to do that.\u003C/p>\u003Cp>Speaker 1: Lots of fun. So I just wanted to make sure that, people in chat, you know, it's it's not about just adding a thing. It's it's a little more involved than then. And, you know, including assets and, like, the other sent. Right?\u003C/p>\u003Cp>Maybe there's proprietary information or whatever, and you're not allowed to leave it on your hard drive. Maybe you want to zip it, encrypt it, compress it. There's lots and lots and lots of different steps that we have to, kick off there. So alright. Oh, we got some in the chat interaction.\u003C/p>\u003Cp>Cool. Cool. Cool.\u003C/p>\u003Cp>Speaker 0: So alright. Top to bottom. First question. What speaks against using JSON or YAML files? Instead of JavaScript, this way the changes in the webpack could also be synced back to the files easily.\u003C/p>\u003Cp>So for what it's worth, the the, formats that we're talking about being generated from directives would most likely be, you know, in some sort of structured format. Not quite sure if that's JSON or YAML yet or if we have to find some sort of optimized, file format to do that. Because the the the risk of JSON and YAML exports, once you start including data, you know, we no longer know how much data you wanna include. Like if we're treating this as you could use this for backup restore we could talk about a large large amount of data right at which point we need to have a very optimized structural format that may or may not be usable in that point. Right?\u003C/p>\u003Cp>Connor, remind me, we found like an Apache file format that could be interesting for this. I think what was it called? Parquet or something. Right?\u003C/p>\u003Cp>Speaker 2: It was called Parquet.\u003C/p>\u003Cp>Speaker 0: Parquet. Yeah. That would be an interesting file format for something like that. Or potentially using a SQLite database as as the exported file. Like, that's a completely different, direction.\u003C/p>\u003Cp>But But you get the idea. We need to have some sort of optimized compressed file format because the export file could get really large. Now, it might be an option for the way, you know, you save one of those to just save it in a sort of raw mode. Right? Where it doesn't save it compressed, at which point it's just it could be human readable YAML or or JSON, including, you know, the ability to properly, source control it.\u003C/p>\u003Cp>On the migration note, I think you answered that before, Daniel, like, exactly right. If you're doing auto generated migrations, it's really only for the database schema part. Like, we can't really know on your behalf if you consider, insights dashboards part of configuration or flows or something. Right? So it's it's gonna be it's gonna be tricky because people different people have different export requirements.\u003C/p>\u003Cp>And if you go from dev to prod, all bets are off. Right? You you never quite know what the the idea is. Creating internal libraries fiber to schema works with native access to directives rather than the API using integrations and help creating a lot of complex repetitive action. I can imagine.\u003C/p>\u003Cp>Yeah. Because you can write a little JavaScript for loop and just block. You have 10,000 collections. Right? But, you know, the lack of two way integration between the UI does cause issues which is the unfortunate side effect of using, you know, a programming language rather than a declarative language like yaml or JSON, for doing schema modifications like that.\u003C/p>\u003Cp>You're gonna lose that two way integration. That being said, you know, if Directus has a don't allow me to change the schema environment variable flag, whatever, you could make that, you could do that on purpose. Right? For a production instance, for example, I can totally imagine that you disable any sort of schema modifications just for security reasons, and and availability reasons and only allow those changes to happen through whatever system we're we're cooking up here. Right?\u003C/p>\u003Cp>I think default value filtering could help make the YAML auth more manageable. Fully agree. You know, we should only store the stuff that we need to know, and storing default values feels like a waste of waste of space. Then Azure is working on some sort of YAML based metadata authoring, announcing Azure data delivery net auth sales. What marketing email?\u003C/p>\u003Cp>Very curious. Haven't I haven't heard of that one before. If you wanna keep the GitOps thing, it should really be a text format. Good point from Tim. Which may or may not be answered from by Dominic here.\u003C/p>\u003Cp>If you split up between schema and content in different formats. Right? Maybe the configuration piece is all human readable file formats. But if you have a data export that is maybe there's a file size threshold. Right?\u003C/p>\u003Cp>If you have a very large CSV export maybe there's just a smart point where it's like, oh, you're trying to save 10,000 rows We're gonna flip it automatically into a compressed, non readable format so you get the best of both worlds. Right? Potentially. Cool. Alright.\u003C/p>\u003Cp>Cool. We tried.\u003C/p>\u003Cp>Speaker 1: I it's something similar. Yeah. Just to, you know, chime in to regards to what you just said. So with the, like, yeah, like, yeah, we have to split that up preferably, you know, or at least, you know, required to have this as text based so you can, you know, use it in version control, whatever. And, for, like, including items, for example, you know, a ZIP is is a nice thing that you could use, but then, of course, you know, maybe this includes then items that are not, like like an old version, for example, and, you want to insert something in in where a field doesn't exist anymore, and and lots and lots and lots of other, you know, stuff.\u003C/p>\u003Cp>And, you can then, of course, if we then have all of the different points that we want to persist, right, like flows, permissions, the general config as it is, you could include this then also in the export with the items, so you can do both at the same time or see if it differs and then cancel the thing. But, yeah, it's, it's a fun thing. You know? There's lots and lots of things that could go wrong. There's so many.\u003C/p>\u003Cp>Speaker 0: No no matter what, we wanna make sure that the output file is a single distributable. Right? We do wanna make sure because we on the one hand, we're saying we have to split it up into multiple files in order to make it efficient and easy to work with, But, at the same time, we also wanna make sure that you have a singular thing, singular file that you can send over, to somebody else. Right? Either through the API, so you just have a single download or a single upload, or as a file, maybe packaged through, you know, the marketplace.\u003C/p>\u003Cp>Shout out. Wink wink. Notch notch. Or as, just to email it to somebody for a like air. Right?\u003C/p>\u003Cp>Put it in a GitHub issue as a zip. So there needs to be some sort of both. Right? But I could also imagine that, you know, the API, lets you download it as just a zip, right, that you can just double click to open if you're on macOS or do whatever else it takes on other platforms, to unzip it. Looking at you, Daniel, I'm sure there's a 2 step process.\u003C/p>\u003Cp>For those who, out out of the loop, he's, this is the year of Linux on the desktop, evangelist within the team.\u003C/p>\u003Cp>Speaker 1: Yes. This year. Here it is. This is the year. Mark my words.\u003C/p>\u003Cp>And this year is the year of the Linux desktop. This year. Yes.\u003C/p>\u003Cp>Speaker 0: But long story short, we it's it's we're we're in that weird in between that we need both. Right? We need both the single file and multiple files. So we'll most likely have to come out with some sort of zip, gzip, something like that in between. Cool.\u003C/p>\u003Cp>Prisma migrations are an interesting way of doing things. They have a custom format, which is more concise than directed YAML, and then some CLI tools that create actual SQL migrations in the syncs the environment. Yeah. Great example. Right.\u003C/p>\u003Cp>Good example. They have they basically do with that shadow shadow database, if I'm not mistaken, that's how they keep track of those migrations step by step by step. Yep. Right? And then with the the CLI tool, it can compare your custom migration format with what they already have tracked so far.\u003C/p>\u003Cp>And then, you know, apply apply the diff based on that. Still has the similar I think a similar one way issue, though. Right? Because the Prisma migrations that you write manually, I don't think they can sort of update those from the others from the other side, so to speak, that a two way binding. Good point, though.\u003C/p>\u003Cp>Good point. Alright.\u003C/p>\u003Cp>Speaker 1: Could remember. I had a problem with in the past. So, sadly, I can't remember it right now, but it was very painful. So, it's not a 100%. Yeah.\u003C/p>\u003Cp>It's not a 100 perfect thing. Nice. I've seen people you know, there's there's lots of other, what is it called? Drizzle drizzle right now?\u003C/p>\u003Cp>Speaker 0: Oh, there's a couple of RMS like that. Yeah. Yeah.\u003C/p>\u003Cp>Speaker 1: So there's there's lots of sauces that we could, you know, yoink some code from, but be inspired. Let's say be inspired.\u003C/p>\u003Cp>Speaker 0: Yeah. I don't think we've been yoinked. I don't think we've legitimately ever yoinked code before.\u003C/p>\u003Cp>Speaker 1: Borrow. Borrow. See.\u003C/p>\u003Cp>Speaker 0: Strategies is interesting there. But the the the main difference and then you you touched on it perfectly before. The main difference there is the just keeping track of the database schema versus having that ability to just sort of manual template in between moving stuff between dev and prod that is not just schema, but also data and how to deal with that, which at any point needs to be, something you can do manually. Right? It doesn't have to be manual all the time, but it needs to be something you can do manually Right?\u003C/p>\u003Cp>Cool. Well, with all that being said, looking at the clock here Connor, back to you. We've discussed quite a lot of the research up until this point, some of the requirements. You and I have also sort of been daydreaming about potential ways to implement this moving forwards. You wanna quickly touch on sort of the different, different phases and different sort of parts that we wanna touch as part of this bigger bigger effort?\u003C/p>\u003Cp>Speaker 2: Yeah. So the first thing we wanna do is we wanna figure out how this is gonna look, the distributable, the diffing. What is that file structure? What's the file format? You know, what are the requirements for that?\u003C/p>\u003Cp>Do we have encryption? Do we have multiple files? Do we have one file? Do we have whatever? Like, we've already discussed here.\u003C/p>\u003Cp>First thing is defining what that'll look like in totality, covering all use cases so that as we progress through the the phases of this project and making it happen, we can keep that spec that we have for those files in mind, and we can make sure that we cover everything whenever we could work through the different phases. But the that is phase 0 to finding out that spec, figuring it out what it looks like. Phase 2 or phase 1 would be upgrading the schema service foundationally right now. So making sure we have all the bug fixes with the schema service taken care of, you know, adding the new different, you know, different strategies that to it that might be needed. You know?\u003C/p>\u003Cp>Do we have, you know, add in those exports, export filters, you know, only export this stuff, you know, that stuff. So getting that schema service, adding some more features to it. And then the next phase is working on data importing. And so adding in the different features that would be needed to make the data importing work, You know, dry running, imported data, importing strategies. You know, do we have a import strategy where right now everything just gets up sorted?\u003C/p>\u003Cp>Do we wanna have an import strategy where if you import the data, it drops all the current date data in the table and re imports everything fresh? You know, adding those different options. Some other just things, ideas that have been thrown in the mix is, you know, if you're moving between one instance to another instance and you don't wanna bring any of your IDs, your primary keys, your foreign keys, you know, adding some type of way to anonymize that as it goes from one instance to another. So the new instance makes new IDs for everything. Or adding, you know, the ability to for, like, templates, you know, if I wanna bring in a template for project management and have a due date in one of the items, but I made the template a year ago, then the due date's gonna be from a year ago.\u003C/p>\u003Cp>So, you know, being able to have dynamic data that gets brought in on import. And so, you know, oh, I want you to import this as a date, but make it for in 2 hours after the import is when it's set. And then the next phase after that, working on data importing is getting into putting it all together. So we've worked on the spec for the thing. We worked on the schema service.\u003C/p>\u003Cp>We worked on data importing. Now we need to bring it all together into this this new overhaul configuration, multimodal thing, and have it work on bring all those pieces together and make it work, basically. And then after that, once we've gotten the specs spec'd out, the schema service upgraded, the data importing upgraded, we brought it all together, and it now works foundationally. Now we need to figure out how do we use this to implement these different use cases? So for templating, you know, what do we build inside of the data studio admin app from a user interface and API perspective to make templating work?\u003C/p>\u003Cp>What do we do from the configuration as code to use the stuff that we've already built? So how do we implement the feature set that we want in the configuration as code use case. You know, are there any other use cases? You know, back up being and restoring an instance. Do we have the feature set that we wanna use for that?\u003C/p>\u003Cp>And then that should, in theory, wrap up the project after that, after we figure out and implement those use cases. So there's quite a few steps here. Quite a lot of different items and it's going to take a long time. But at the end, we should have something pretty cool.\u003C/p>\u003Cp>Speaker 1: Ideally. Yeah.\u003C/p>\u003Cp>Speaker 0: And I think I think Daniel's facial expressions told told the whole story. No. But that'll I mean, it'll make sense to me. It sounds it sounds like a large overwhelming amount of stuff as it usually does. But by breaking it up, you know, step by step like this, we actually have a pretty solid idea, you know, start to finish of of what is involved to getting that across the finish line.\u003C/p>\u003Cp>And, yeah, as per usual, you know, there's a lot of, what's the word? Direct this magic going on under the hood to to tie it all together. But I'm always just very excited and glad that we're able to sort of re envision this as we have one underlying core foundational engine that can power all of those different use cases. Right? Rather than trying to tack on a new sort of templating piece and a new sort of code first configuration piece and a new sort of other piece.\u003C/p>\u003Cp>Right? That just increases the tech that makes it hard to maintain. It makes it those different flavors incompatible. You know, somebody will ask, how do we do a template as code? And we're like, well, you can't because code is not for templating.\u003C/p>\u003Cp>You know, that kind of stuff. It really reminds me of how we built flows. Right? That we had the hook extension first. And if you configure a new flow, you're effectively just building together a hook extension, and it's all the exact same underlying, underlying logic and event based system, which is also why, you know, flows you can do it say the same stuff as you can in a hook.\u003C/p>\u003Cp>Right? Well, same events. I mean, you can, of course, in a hook, you can code it yourself. So I'm very glad that we're making this, you know, a foundational upgrade to the schema snapshotting engine rather than trying to make it yet a new thing cool cool cool well that all being said I see we're at time here. Let me just quickly briefly peek at the chat.\u003C/p>\u003Cp>Do we missed anything? Pascal mentioned dynamic collection of field names will be cool. EG importing third party templates so you can choose your own target names. Good point. I think a similar goes for the conflict resolution piece that we talked about.\u003C/p>\u003Cp>Right? If you're trying to import won't be complex. Very, very, very true. But rest assured, it will be complex, I'm sure. Cool.\u003C/p>\u003Cp>With all that being said, this will be, life on Drax. Io/tv/requestreview in about a week week's time. Also, last episode, you can find there too. Should be if you're watching this on DirectTV, it's probably somewhere over there, there, there, or down there. Is this the point where we say, like, and subscribe?\u003C/p>\u003Cp>No. We don't have that yet. It's exactly we thank you all for joining.\u003C/p>","Welcome everybody once more to a wonderful request review session here where we go over feature requests and figure it out. Now what do we do? I'm afraid we ramble on for about an hour about the technical complexities. Remember, the goal here is to basically divergently discuss, you know, what is the feature request, what are we trying to do, what is it trying to achieve, And how do we think we can make it happen in a very sort of direct to see way? What are we talking about this week? Yes. We're talking about configuration. Configuration as code. Let's let's figure out how to take schema endpoints to the max and actually Schema endpoints to the max. Manage the entire project as code. So this is really with a focus on GitOps. Right? Where you have a sort of centralized repository of static files that is the single source of truth for all configuration of the running project. Which as you might guess, they get complicated fairly quick. Hello. And as per usual, we'll be eyeing the chat. So if you have any questions in between or any suggestions or any good thoughts, please do please do put it in the chat. I already saw his name fly by. Well, most likely I have a very special guest for you today, because our very own Connor has been researching, you know, some of this for a little while now. But before we dive into the research results there, let's discuss a little bit of the requirements that are presented here in the current feature requests. Right? Because the one thing we know now, you know, the current state of affairs, we have that schema snapshot and apply endpoint, that we use and sort of recommend for, you know, moving bits of schema, to and from dev to prod, that sort of thing. But as people have pointed out, you know, that is still for schema only. Right? So we know one of the big requirements for this is gonna be you need to figure out additional configuration, additional additional, data points maybe from your own tables, you know, environment migrations, like you mentioned there, which includes, you know, what about roles? What about flows? What about presets? What about translation strings, etcetera? This well, one of the complexities for this is figuring out, you know, what is configuration within the context of directives in the first place. Right? Which is a discussion topic that I have had some trouble with just going through myself already, which is what is configuration? You know, are your roles and the way you configured permission configured, you know, permissions configuration, probably. But the users within those roles, probably not. But then users with static tokens, maybe. Right? If you have your own tables, maybe you have a single, you know, app settings, singleton collection that you use for configuration is that now configuration that is part of code first configuration. Right? Even though it's not a system table and you're not configuring directives, you might still be configuring other things. Although, that's where the fun starts. So maybe we could scroll down a little bit, Jonathan, just take a quick peek through, the other the motivation and the requirements here. So, you know, as we kinda touched on already, the same here from, Erif van Oort. Pretty sure that would be a Dutch user. It's it's about things like permission logic, you know, keep the local dev environment in sync, source control is the source of truth. Right? You wanna make sure that you can spin up new Directus instances not completely empty, but start it from, you know, a template that is in your repo. If there's an issue, you can easily, you know, share, the configuration of your platform. Daniel, if you would kindly mute, you're being very annoying. From the replying system, this is immediately where where it gets complicated. Right? It's like, what is configuration? Right? What is configuration? When it comes to import export, how to define what gets imported, what gets exported? Basically, the same question to me. Right? How does it get imported? You know, are you merging stuff? Are you overwriting stuff? What happens if you try to insert something that already exists? You know? How do you deal with conflicts? Very good question. So if you wanna scroll down a little bit further to see what else is in here. Yeah. No. I don't know about that point. That's a good question, but a very, very long one to answer properly. But the gist is, you know, if you work with multiple people with different setups and if somebody changes your database schema, for example, how do you synchronize the state between your instance and another instance? You can do that with our schema endpoint. You can we already have that, capability. But, technically, you would want to or ideally let's say ideally, you would want to set up your configuration as code, because then you have a single source of truth. If you're developing a new feature, for example, you need a new table, you need new fields, you want to test something, you wanna try something, but then you, you know, delete some fields, How do you get the changes synchronized between different setups? And even the problem gets even larger if you have an organization, for example, with, let's say, I don't know, one dev department of, like, 8 people, for example. Stuff gets really gnarly really quick. How do you synchronize then between 8 people, for example, between different branches, different features, different collections, different fields? You know. Let alone a test team of 200. Right? Yeah. And this this is, you know, for a very small team, it can get quite gnarly pretty quickly. But, you know Yeah. There's a couple other things there too. Right? When it comes to the git repo flow specifically is that any change to the schema of the project is now sort of, like, version controlled, so you know what happened when and you can roll back. And you have accountability because you know who made the change, through that sort of git first approach. Right? The other main thing there too, I think, is from a database template, you don't have files, which is one thing we'll touch on in a and the second thing is it's database vendor specific. That's another thing. Right? Like, you could plop the whole SQLite file in a repo, but, you know, if you wanna move if you have a local dev instance that uses SQLite and you wanna go push your change into production in Postgres, now you have a workflow trouble. Right? Even if you have local Postgres, the server Postgres, you might go, you know, I don't know, Postgres 10 to 13 or something. If there's a version mismatch, you know, there's there's things to to consider there. Of course, there's third party tools. I see Ansible mentioned here that you can use to sort of move databases across, sort of thing. This would really be sort of direct as native way to move configuration around. Right? Which I personally see it as an improvement or an upgrade to the schema snapshot system that we have rather than a completely new thing. Just the real question just becomes, you know, how do we add more stuff into that so you can use it for this? That's that's really the, to me, the the underlying discussion. Right. Jonathan, if you wanna scroll down a little bit further, you can see if there's any other points. Wanna make sure we don't forget. Export considerations, multiple files. I think that's a very important requirement because we've already seen some of the schema snapshots just get bonkers large. Right? Because if you have a 1,000 collections, a total of 25 100 fields sounds insane, but it happens in the wild. The one export file, you know, is is megabytes and megabytes and megabytes worth of of JSON. Tens if not 100, which gets unwieldy pretty quick. It also makes it more difficult to import, by the way, because we're not really able to stream it all that well and then it becomes a very large file. So you have to read it into memory and then and then use it. Let's see. Selective export. That, I think, is a tricky one. Right? How do you know what you're exporting if you, consider your roles and permissions part of this, but you have one admin dev role that you don't care about for your production instance, How do you pick and choose? Right? Pick and choose what to what to include, what what not to include, and handling, you know, sensitive data. Very good point. You know, is this gonna be plain text in a static file? Tricky. Right? Tricky in a repo. If you scroll down a little bit further, The modular files of extensions, single file per collection, you know, we kinda test. Does does it actually make more sense to have selective import versus export? Great question. Great great question. Maybe. Maybe. Yeah. It's it's like if you have Go. I'm sorry. Go ahead. I I don't have to remember to mute and unmute myself between every sentence. That's fine. Yeah. I can see both being very useful. Right? For example, if if you have a very, very large, instance with, like, Greg mentioned, right, like, a 1,000 collections, And on your dev instance, you only want to add one thing, do you really need to export, like, this whole thing that's, like, I don't know, 10 megabytes or whatever? Maybe, you know, maybe, it would be enough to just export that table with its fields, and you'd be good to go, because then you could import that partial instance, maybe. But, yeah, for for import or for export, both could be useful. But, yeah, like we said, it's just lots lots and lots of stuff to talk about there. Yeah. TBD is is the honest answer. I I also feel like both is probably where we need to end up with that. Because to your point, if you have a large project and you only care about a small subset of that as a sort of templatable piece, you know, you don't want to export everything and have a bunch of unneeded data in your repo muddying up, you know, the workflow and the reviews. Because then also imagine that you make an export and then now you have a PR of, like, 16,000 lines of stuff that you don't really need. Right? But, yeah, let's see. Extend, you know, existing schema files. That's an interesting one. Merging multiple together, importing snippets from other files, maybe, you know, from nested collections. So that's all about, you know, the the file structure for the project. Saving the non defaults, I think that is more of a technical requirement to me. Right? It's like we don't have to save default values from Directus in the schema snapshot because they're the default values. Dynamic configuration sync, it's just whenever you make a change in the studio, it auto exports basically, which feels heavy, personally. Feels a bit heavy, but could could potentially work depending on the file format. But then again, how do you choose what to export on the automated one? Right? So TBD. That's also why it's a could have. I mean, they've they've thought about it luckily. Automatic real time sync, sort of similar idea. Right? But an option in the Data Studio API triggered chrono periodically high. So the one thing I do notice in the requirements list here is that there's a lot of talk about how to get it out of Directus and in what file format, but not so much the other way around. Right? How do you get it back in? So if you have something in your repo, whatever that something is, what does that code look like and how do you get that back into the Directus instance? Right? This might be a good point actually, a nice little segue. Like I hinted at at the beginning, our very own Connor has been doing quite a lot of homework on this just to figure out, you know, the, the format and some of the ideas around this and how it could work. So if let me see if I can find him. Where is this little so many here. Look at that. Hello, Connor. What have you been what have you been up to recently? I have been up to quite a bit involving this config as code and how it plays into all the other different parts of direct disc that we wanna do. Let me get my notes up. Here we go. So you said you wanted me to talk about the structure of the exports? I think it'd be cool if you wanna give a quick overview of sort of the the research process itself. Like, what are the things that you've been looking into? What have been the considerations or requirements? And sort of the things that you found. And then dive into some initial conclusions. Sure. So what I have been going through and researching is basically we have a couple of different feature requests from config as code to templates to migrating between instances to migrating between different databases. And all of it sort of involves, you know, moving configuration between instances, moving data between instances, and moving files and assets between instances. And that is a very big task when you're trying to be database agnostic and you're trying to be efficient. You're trying to have you're trying to support multiple different use cases where sometimes you wanna overwrite everything, sometimes you just wanna bring in some stuff, sometimes you only wanna take out some stuff. Sometimes you wanna stream it all. Sometimes you wanna have the file small. So there's a lot of very different considerations that go into it, and then making it all happen with one sort of directest way of making it all magical. It becomes a very big rabbit hole that you start diving down into. And so one of the stuff one of the things that we've that I have been looking at is, you know, what are all the different use cases for it, and what are the requirements for all those different use cases? And so configuration as code is one of the use cases on there. It's not completely fleshed out yet because that was not on it's one of the later goals of what I've been working on. But with it brings, you know, how do you integrate it with CICD, you know, GitHub, GitLab. You know, do you have your own hosted GitHub, You know, self hosted GitHub. You know? Where do all your stuff is stored? And so there's a whole bunch of different parts of it. Right now with the schema service, we go and we give you you export a schema of your stuff. It exports everything. You diff it against your instance, and then you apply that diff, and it gives you, like, the changes if you can do it or not. And right now, that's really it. There's not really too much to the schema service outside of that right now, and adding in all these different layers and features. The schema service is definitely gonna have to take a new look to it. And so one of the things that we've been looking at is, you know, that initial export of a schema, you know, making it more of a distributable type of folder structure file structure, whether it's a compressed zip file or some type of other special file. But basically redefining how that schema export looks to be able to hold all these different configuration items, to be able to hold data, to be able to hold assets, and defining that structure and, you know, you know, as Ryke mentioned earlier, you know, do you want that stuff stored in plain text or do you want it to be stored in, you know, some type of encrypted format or do you want it to be compressed? So there's a lot of different variables there. And then once you take that distributable that gets made, it can be I mean, for some instances, if you've pulled out data and assets and configuration, you know, that thing could be huge. And so we wanna go we wanna bring that into the this new instance of the target instance. Right? And so we need to different and change it. And so bringing all that in and processing in is a whole another thing. You know, do you wanna bring in all of it? You know, you have all the export controls. Do you wanna have import controls to how it gets implied and, you know, how it gets imported? And so I've been going through and documenting all those different ways that we can do stuff, you know, what is dependent, you know, if we wanna do this, then, you know, we have to do that, you know. And so we've been looking at one of the things this week, you know, is what type of file format for all of this type of stuff, for how it gets really big. You know, if it gets a lot of data, if somebody has a 1,000 collections and 4,000 fields, you know, is a CSV file, a JSON file really the right file structure to store all of that data? And so we've been looking at, you know, different options and different file formats for storing, you know, structured data like that in an efficient compressed way that also lets you keep the schema of the schema export defined and structured in a way. And then also making sure that we keep that same right now, we hash the schemas and stuff so that they all stay. You know? You could only use the schema to apply to this instance because you just did it with it and yada yada. So having that in there too, you know, do we have a metadata file inside of that export that, you know, talks about what the export is? You know, do we have it? Do these become an extension type, you know, that can be used throughout the instance in different places? You know, there's a whole bunch of different options there. Yeah. Yeah. Absolutely. It was a great intro. Yeah. It was a very good yeah. Exactly. Exactly. Yeah. So the first order of business to your point, you know, figuring out what does that file format look like. We know some of the requirements now based on this discussion that we just looked at. We know some of the the downsides of the current format. So that's a great step. Then, of course, the second big step will be figuring out, you know, how do you go from that sort of source of truth overview into applying it for realsies. Right? So we have that sort of diff step in between. So for those unaware right now, if you upload a schema snapshot into the Directus API, it will compare it to the current state of the database and then return, you you know, the the the list of differences, basically. So it's a diff, not a list of changes. As in a step step by step list, it's just a diff, like an a a versus b. And then that diff is then uploaded to an apply endpoint, which will basically, you know, apply the changes required to get rid of the diff, right, to make sure that the that the 2 are in sync, that the instance is in sync with your file export. So based on that, Connor, we've done some research on what needs to happen on that diff endpoint itself too. You wanna you wanna share some insights on what we know now, at least, are some of the requirements to make that work properly with all of these new new additional features that we're trying to add in. Yeah. So with that dipping endpoint, some of the things that we are looking at is, number 1, if you wanna bring in data. Right? You know, how do you diff large amounts of data? Are you able to diff large amounts of data? That's one of the research things that's on the list. You know? Right now, we have an import and export service to import and export data. Looking at Attica drive run options. You know? Can you import this data? Can you export this data, for that diffing stuff? You know, if you have a really big file, so you do have 300,000 collections and fields, you know, that's gonna take a long time to make changes to the database and to go through and find that diff. And so having some type of long task runner on the instance that's able to sit there and work through that to that, diff or making that diff or distributable or whatever it is. You know, having such a long running service of the background of your instance, I can handle that. And then also if you're going through and you're applying all these big changes or diffing it or whatever, you don't want people in your instance changing the stuff as you're trying to change stuff. So implementing some sort of maintenance mode on your instance that basically locks it down and puts it, hey. We're making changes right now. You know, you can't it doesn't let anyone else change the schema or anything or the data or whatever you want it to do. We also, have been looking at, you know, for asset data, you know, pulling in you know, do you pull it in from the distributable file, or do you pull it in from the data the asset source? You know, do you pull it in from the s three bucket directly? You know, do you use it like that, or do you package it into the distributable? Or, but, basically, for the diffing part, the other part is that if you have a really, really big distributable or schema thing, whatever it ends up being called, you also upload downloading it from one instance, uploading it to another instance just to download another big thing, just to upload the other big thing back again is a lot of moving back and forth of all this different stuff. And so the other thing is when you upload that schema, whatever, it diffs it. Instead of it downloading the diff back to you, then you having to send it back up, it being able to just keep the diff on the instance and you just being able to tell it to apply the storage diff that it already has. And so you don't have to have that all that network changing back and forth, and then, you know, Internet goes out, then you're screwed. You know? But that's one of the things that we've also been looking at for the diffing. And then another thing is, you know, different types of strategies of diffing and importing. So that, you know, do you just wanna up cert stuff? Do you just wanna add new things and you wanna ignore everything else that has conflicts? Or do you want it to only apply if there are no conflicts, you know, or do you want it to overwrite everything, you know, so it doesn't matter if there's conflict. We're gonna rewrite over it with everything, you know. And then instead of just returning a singular diff that just compares the 2 different schemas and it just says, hey this is what's different. You know putting in more migration like making it more of a step type thing. So it works through migration steps. And, you know, oh, you need to do this. You need to do this. You need to do this. You need to do this. And basically a workflow that the thing can work through and and guide those long running task runners on what to do and how to configure your instance. And I think last but not least, the having some sort of format to expose potential conflicts for manual resolution. Right? So if one of the strategies has to be that it's up to the end user to pick and choose what to do. So imagine if you go, you know, from a dev to prod, life cycle, right, where you're not so much delete everything and insert everything, you wouldn't wanna do that in a prod obviously, And if you have an absurd strategy, but there is a conflict, right, you you have, like, a foreign key that doesn't work anymore or something like that. There needs to be some sort of format in whatever this diff looks like or this migration step format looks like that just has, a list of Here's the steps with the known conflict What do you wanna do? Right? How do you wanna modify that that step, those steps to get around the conflict? Right? Do you wanna upload, you wanna upload new data, or do you wanna, ignore that particular step, or do you wanna ignore those records? You know? So to your point, if we need some sort of driver and to check if you can import all of the data, it's sort of a requirement in order to be able to extract, you know, potential conflicts. So we need to have some sort of way to search through the data you're trying to apply, in order to know how to deal with conflicts. Right? So now that we're talking about all of this, what we're what we started to notice is that we're not so much talking about, you know, configuration as code specifically or templating specifically. What we're basically shaping here is a system that works for multiple things, right? Depending on how you use it. So if you were to make, a snapshot of everything, just full stop everything, and you import it as as apply everything, what you're talking about now is basically backup restore. Right? If you're exporting a small fragment and you're importing that into another project, you're basically talking about templating. Right? If you're exporting just the schema part and no data and you apply that to a new project, you're you're talking about seeding or something. You know what I mean? Like preparing preparing, a database basically, a a new project for what you wanted to do. And the question now is, how does that how does that all tie together how does that tie back into the configuration as code parts specifically, Connor? Because what we're talking about now is, you know, a new sort of format, generated by Directus that you can save somewhere, you know, which is fairly still, it's still fairly proprietary because it will have to be heavily compressed and, you know, directors needs to know what the format is. So what is the current thinking on tying it back into the code side of this question? Right? Yeah. So if we went the route of having some sort of distributable file structure folder structure that is some proprietary format or is encrypted or compressed or whatever, you know, you're not gonna be able to sit there and write code that is a compressed file. You know, you're gonna have to have write something that generates that file. So one thought that we've been having is following the lead of some other types of, you know, companies like AWS and their SDKs. So, basically, having some type of SDK that you can write and configure your instance with, and then you tell the CLI or whatever to execute that, read those different set the code that you've written, and then it will make a direct distributable file, diff file, whatever it is, from the code that you've read. So if you wanna go through and you wanna define all of your collections and your fields or whatever, and you can go in and define that in all your files and your code and then execute that code, it comes up. It generates that file that you can then use to apply those changes, import those changes, diff those changes to any of your target instances that you want to. Jonathan, if you might be wanna pull up, I think one one piece of inspiration that we were looking at for that part specifically was AWS's, what do we call it? CDK, I think, code development kit. If you wanna quickly Google that, it could be it could prove like, it could put some flavor to that to that point. So the way AWS has that, they basically made a JavaScript library that you can use to code, like, configuration. And then what it does under the hood is it effectively converts it into, a CloudFormation template, I wanna say, and then applies it immediately. Right? So under the hoot, you don't really notice the difference, but it's effectively a 1, 2 jump. Right? So it converts it into their proprietary thing in the middle first and then just applies that as is. What am I searching for? Sorry. CDK, the cloud development kit. If you wanna pull up the GitHub repo for that, maybe I have a link somewhere. I'm just curious if they have some some examples somewhere. It's been a minute since I've played with this, but it's an interesting, idea. There was a Directus community library, a little while back that that tried doing a similar thing, but it would run it against the API endpoints. It wasn't as flexible yet because we didn't have we don't have, you know, GreenStep. Is this branch? You know. If you wanna do AWS before that, because I think CD case is all different. Yeah. There we go. There's a link in the chat as Open it up. Here we go. Here we go. Here we go. Is it gonna go? There it goes. So this is an interesting reference for people that wanna look it up. At home, it's basically, you know what was that? Distracted by the chat immediately. Using something like CDK would mean that changes would need to be replicated from the UI to the generation scripts. Changes would need to be replicated. That's a great point. Yeah. How does that go both ways? Right? Because if you have that one format in the middle, directors can recreate that format in the middle. The directors wouldn't be able to recreate arbitrary JavaScript, basically. Right? So when you opt into something like that, I think it becomes a one way street by definition because we cannot figure out what parts of your JS file are, or your code because they also have some other languages, but you get the idea. And we don't know what parts of that file are auto generatable and what parts are, human created. Right? There's so there's no way to auto generate that back into a manually created file. So at that point, that's a great point, but it's the it really becomes, you know, it becomes a a one way street at that point. Some of the data community did a sort of proof of concept library to do this for, which is very interesting. So if you wanna pull up the direct to community schema builder kit repo, I just sent a link in the chat there. It was very much inspired by a similar idea where you have a JavaScript file that you use to sort of define. It's almost, you know, a declaration file rather than, you know, JavaScript, but it is still just JavaScript that runs from top to bottom. But you could define your schema and how it's applied as, you know, individual build steps, in a JavaScript. So this is where it gets real heavy on the code part of the codeless configuration. Right? And not so much just the, moving stuff around. So in in terms of big picture stuff, I really see this as the final step of whatever these changes are that we're discussing. We'll have to start with what is that new format in the middle, how is it generated, how is it used, and then see this as a way to sort of generate it into that format and then apply it automatically. Right? But, yeah, that that JavaScript syntax is an interesting interesting idea. So, yeah, I see some folk typing. This is one of those very typical director's projects where there is about 600,000 different opinions on the ideal way of doing this. And I think we we saw it in the chat immediately. Right? Shout out to a person that was like, isn't it not just the database template? Why why bother? Right? Which I can totally get behind that, but then there's a 180 or so of votes or something like that on the on the discussion. So apparently, you know, that is not an opinion that's shared. Yeah. What what's interesting is, right, because, technically, the most basic example would be something like a database migration, generally, in the beginning, you know, for the configuration as code, for example. So there's another, a similar project director, the CMS like project that I've checked out and see how they did it. And, so they handled this a little bit differently. They don't have, like, a DSL type, you know, language that defines your infrastructure or whatever. But, they went the route of as soon as a person or user, via the UI creates some type of change in the tables or the collections or however you want to call it, the instance automatically generates a migration file locally for that specific change. And, there's then a mode in the instance where you can disable any, any ability for other users to change the actual instance. So you can actually just rely on the migration files, which is an approach that you could take, you know, because a migration file then could technically do anything you'd like, you know, with with regards to, you know, the collections, the fields, whatever, even inserting, items. But, then, you know, because we're a directors, we want to that's a little too easy for us because we would like to include some type of things like, alright. How about you, locally, you develop some type of new feature, new new table, new collection, new fields. And then in order for that to work in the way that you want it to work, you need an item. You know? You need to include a new new row, data row, or an asset. This is the thing now. Because, you know, assets are not inside of the database. So we want to include assets, for example, or maybe this, of course. Nothing is, you know, set in stone, but, you know, including assets, for example. So you want to make some changes, and you need to include some assets for your changes to be even, you know, useful. So you would then have to, you know, do your changes, test it locally, include everything with the correct file name, with the correct row, whatever, or other metadata of an asset, for example. But then on production, you would have to replicate that again. So you get this back to back to step 1. Right? So this is kinda it kinda sucks, with a migration part. So even then, if we want to include this, then we get back to the issue at hand that we were talking about. Right? We want to have a process that could export something, and you can recreate that between instances and so forth and so forth. I would, I just wanted to mention that for the others in the chat because, it's not just about, you know, just adding a field because that's basically, you know, that's a basic thing, which we could solve. And I think the Director Schema Builder kit is basically that. Right? You you generate some type of syntax, which generates some type of migration. But you have to keep in mind then, of course, right, different database vendors, we have to abstract that. Because, for example, you know, in SQLite, if I remember correctly please correct me. If I remember correctly, like, you can't alter a table and introduce, like, a foreign key. You're forced to drop the table, actually, and recreate it in order to add a foreign key, for example. Other databases can do that as CLI can. So there's lots of different honest there, I'm pretty too sure that the last minor release of SQLite didn't come out too long ago. They finally do have that alter table sort of baked in. Although then, you, of course, you have the you have the side effects that it depends on your native build of SQLite on your machine, which may or may not have had. So generally speaking, historically, you've been absolutely right. It's been a nightmare and a half to to do that. Lots of fun. So I just wanted to make sure that, people in chat, you know, it's it's not about just adding a thing. It's it's a little more involved than then. And, you know, including assets and, like, the other sent. Right? Maybe there's proprietary information or whatever, and you're not allowed to leave it on your hard drive. Maybe you want to zip it, encrypt it, compress it. There's lots and lots and lots of different steps that we have to, kick off there. So alright. Oh, we got some in the chat interaction. Cool. Cool. Cool. So alright. Top to bottom. First question. What speaks against using JSON or YAML files? Instead of JavaScript, this way the changes in the webpack could also be synced back to the files easily. So for what it's worth, the the, formats that we're talking about being generated from directives would most likely be, you know, in some sort of structured format. Not quite sure if that's JSON or YAML yet or if we have to find some sort of optimized, file format to do that. Because the the the risk of JSON and YAML exports, once you start including data, you know, we no longer know how much data you wanna include. Like if we're treating this as you could use this for backup restore we could talk about a large large amount of data right at which point we need to have a very optimized structural format that may or may not be usable in that point. Right? Connor, remind me, we found like an Apache file format that could be interesting for this. I think what was it called? Parquet or something. Right? It was called Parquet. Parquet. Yeah. That would be an interesting file format for something like that. Or potentially using a SQLite database as as the exported file. Like, that's a completely different, direction. But But you get the idea. We need to have some sort of optimized compressed file format because the export file could get really large. Now, it might be an option for the way, you know, you save one of those to just save it in a sort of raw mode. Right? Where it doesn't save it compressed, at which point it's just it could be human readable YAML or or JSON, including, you know, the ability to properly, source control it. On the migration note, I think you answered that before, Daniel, like, exactly right. If you're doing auto generated migrations, it's really only for the database schema part. Like, we can't really know on your behalf if you consider, insights dashboards part of configuration or flows or something. Right? So it's it's gonna be it's gonna be tricky because people different people have different export requirements. And if you go from dev to prod, all bets are off. Right? You you never quite know what the the idea is. Creating internal libraries fiber to schema works with native access to directives rather than the API using integrations and help creating a lot of complex repetitive action. I can imagine. Yeah. Because you can write a little JavaScript for loop and just block. You have 10,000 collections. Right? But, you know, the lack of two way integration between the UI does cause issues which is the unfortunate side effect of using, you know, a programming language rather than a declarative language like yaml or JSON, for doing schema modifications like that. You're gonna lose that two way integration. That being said, you know, if Directus has a don't allow me to change the schema environment variable flag, whatever, you could make that, you could do that on purpose. Right? For a production instance, for example, I can totally imagine that you disable any sort of schema modifications just for security reasons, and and availability reasons and only allow those changes to happen through whatever system we're we're cooking up here. Right? I think default value filtering could help make the YAML auth more manageable. Fully agree. You know, we should only store the stuff that we need to know, and storing default values feels like a waste of waste of space. Then Azure is working on some sort of YAML based metadata authoring, announcing Azure data delivery net auth sales. What marketing email? Very curious. Haven't I haven't heard of that one before. If you wanna keep the GitOps thing, it should really be a text format. Good point from Tim. Which may or may not be answered from by Dominic here. If you split up between schema and content in different formats. Right? Maybe the configuration piece is all human readable file formats. But if you have a data export that is maybe there's a file size threshold. Right? If you have a very large CSV export maybe there's just a smart point where it's like, oh, you're trying to save 10,000 rows We're gonna flip it automatically into a compressed, non readable format so you get the best of both worlds. Right? Potentially. Cool. Alright. Cool. We tried. I it's something similar. Yeah. Just to, you know, chime in to regards to what you just said. So with the, like, yeah, like, yeah, we have to split that up preferably, you know, or at least, you know, required to have this as text based so you can, you know, use it in version control, whatever. And, for, like, including items, for example, you know, a ZIP is is a nice thing that you could use, but then, of course, you know, maybe this includes then items that are not, like like an old version, for example, and, you want to insert something in in where a field doesn't exist anymore, and and lots and lots and lots of other, you know, stuff. And, you can then, of course, if we then have all of the different points that we want to persist, right, like flows, permissions, the general config as it is, you could include this then also in the export with the items, so you can do both at the same time or see if it differs and then cancel the thing. But, yeah, it's, it's a fun thing. You know? There's lots and lots of things that could go wrong. There's so many. No no matter what, we wanna make sure that the output file is a single distributable. Right? We do wanna make sure because we on the one hand, we're saying we have to split it up into multiple files in order to make it efficient and easy to work with, But, at the same time, we also wanna make sure that you have a singular thing, singular file that you can send over, to somebody else. Right? Either through the API, so you just have a single download or a single upload, or as a file, maybe packaged through, you know, the marketplace. Shout out. Wink wink. Notch notch. Or as, just to email it to somebody for a like air. Right? Put it in a GitHub issue as a zip. So there needs to be some sort of both. Right? But I could also imagine that, you know, the API, lets you download it as just a zip, right, that you can just double click to open if you're on macOS or do whatever else it takes on other platforms, to unzip it. Looking at you, Daniel, I'm sure there's a 2 step process. For those who, out out of the loop, he's, this is the year of Linux on the desktop, evangelist within the team. Yes. This year. Here it is. This is the year. Mark my words. And this year is the year of the Linux desktop. This year. Yes. But long story short, we it's it's we're we're in that weird in between that we need both. Right? We need both the single file and multiple files. So we'll most likely have to come out with some sort of zip, gzip, something like that in between. Cool. Prisma migrations are an interesting way of doing things. They have a custom format, which is more concise than directed YAML, and then some CLI tools that create actual SQL migrations in the syncs the environment. Yeah. Great example. Right. Good example. They have they basically do with that shadow shadow database, if I'm not mistaken, that's how they keep track of those migrations step by step by step. Yep. Right? And then with the the CLI tool, it can compare your custom migration format with what they already have tracked so far. And then, you know, apply apply the diff based on that. Still has the similar I think a similar one way issue, though. Right? Because the Prisma migrations that you write manually, I don't think they can sort of update those from the others from the other side, so to speak, that a two way binding. Good point, though. Good point. Alright. Could remember. I had a problem with in the past. So, sadly, I can't remember it right now, but it was very painful. So, it's not a 100%. Yeah. It's not a 100 perfect thing. Nice. I've seen people you know, there's there's lots of other, what is it called? Drizzle drizzle right now? Oh, there's a couple of RMS like that. Yeah. Yeah. So there's there's lots of sauces that we could, you know, yoink some code from, but be inspired. Let's say be inspired. Yeah. I don't think we've been yoinked. I don't think we've legitimately ever yoinked code before. Borrow. Borrow. See. Strategies is interesting there. But the the the main difference and then you you touched on it perfectly before. The main difference there is the just keeping track of the database schema versus having that ability to just sort of manual template in between moving stuff between dev and prod that is not just schema, but also data and how to deal with that, which at any point needs to be, something you can do manually. Right? It doesn't have to be manual all the time, but it needs to be something you can do manually Right? Cool. Well, with all that being said, looking at the clock here Connor, back to you. We've discussed quite a lot of the research up until this point, some of the requirements. You and I have also sort of been daydreaming about potential ways to implement this moving forwards. You wanna quickly touch on sort of the different, different phases and different sort of parts that we wanna touch as part of this bigger bigger effort? Yeah. So the first thing we wanna do is we wanna figure out how this is gonna look, the distributable, the diffing. What is that file structure? What's the file format? You know, what are the requirements for that? Do we have encryption? Do we have multiple files? Do we have one file? Do we have whatever? Like, we've already discussed here. First thing is defining what that'll look like in totality, covering all use cases so that as we progress through the the phases of this project and making it happen, we can keep that spec that we have for those files in mind, and we can make sure that we cover everything whenever we could work through the different phases. But the that is phase 0 to finding out that spec, figuring it out what it looks like. Phase 2 or phase 1 would be upgrading the schema service foundationally right now. So making sure we have all the bug fixes with the schema service taken care of, you know, adding the new different, you know, different strategies that to it that might be needed. You know? Do we have, you know, add in those exports, export filters, you know, only export this stuff, you know, that stuff. So getting that schema service, adding some more features to it. And then the next phase is working on data importing. And so adding in the different features that would be needed to make the data importing work, You know, dry running, imported data, importing strategies. You know, do we have a import strategy where right now everything just gets up sorted? Do we wanna have an import strategy where if you import the data, it drops all the current date data in the table and re imports everything fresh? You know, adding those different options. Some other just things, ideas that have been thrown in the mix is, you know, if you're moving between one instance to another instance and you don't wanna bring any of your IDs, your primary keys, your foreign keys, you know, adding some type of way to anonymize that as it goes from one instance to another. So the new instance makes new IDs for everything. Or adding, you know, the ability to for, like, templates, you know, if I wanna bring in a template for project management and have a due date in one of the items, but I made the template a year ago, then the due date's gonna be from a year ago. So, you know, being able to have dynamic data that gets brought in on import. And so, you know, oh, I want you to import this as a date, but make it for in 2 hours after the import is when it's set. And then the next phase after that, working on data importing is getting into putting it all together. So we've worked on the spec for the thing. We worked on the schema service. We worked on data importing. Now we need to bring it all together into this this new overhaul configuration, multimodal thing, and have it work on bring all those pieces together and make it work, basically. And then after that, once we've gotten the specs spec'd out, the schema service upgraded, the data importing upgraded, we brought it all together, and it now works foundationally. Now we need to figure out how do we use this to implement these different use cases? So for templating, you know, what do we build inside of the data studio admin app from a user interface and API perspective to make templating work? What do we do from the configuration as code to use the stuff that we've already built? So how do we implement the feature set that we want in the configuration as code use case. You know, are there any other use cases? You know, back up being and restoring an instance. Do we have the feature set that we wanna use for that? And then that should, in theory, wrap up the project after that, after we figure out and implement those use cases. So there's quite a few steps here. Quite a lot of different items and it's going to take a long time. But at the end, we should have something pretty cool. Ideally. Yeah. And I think I think Daniel's facial expressions told told the whole story. No. But that'll I mean, it'll make sense to me. It sounds it sounds like a large overwhelming amount of stuff as it usually does. But by breaking it up, you know, step by step like this, we actually have a pretty solid idea, you know, start to finish of of what is involved to getting that across the finish line. And, yeah, as per usual, you know, there's a lot of, what's the word? Direct this magic going on under the hood to to tie it all together. But I'm always just very excited and glad that we're able to sort of re envision this as we have one underlying core foundational engine that can power all of those different use cases. Right? Rather than trying to tack on a new sort of templating piece and a new sort of code first configuration piece and a new sort of other piece. Right? That just increases the tech that makes it hard to maintain. It makes it those different flavors incompatible. You know, somebody will ask, how do we do a template as code? And we're like, well, you can't because code is not for templating. You know, that kind of stuff. It really reminds me of how we built flows. Right? That we had the hook extension first. And if you configure a new flow, you're effectively just building together a hook extension, and it's all the exact same underlying, underlying logic and event based system, which is also why, you know, flows you can do it say the same stuff as you can in a hook. Right? Well, same events. I mean, you can, of course, in a hook, you can code it yourself. So I'm very glad that we're making this, you know, a foundational upgrade to the schema snapshotting engine rather than trying to make it yet a new thing cool cool cool well that all being said I see we're at time here. Let me just quickly briefly peek at the chat. Do we missed anything? Pascal mentioned dynamic collection of field names will be cool. EG importing third party templates so you can choose your own target names. Good point. I think a similar goes for the conflict resolution piece that we talked about. Right? If you're trying to import won't be complex. Very, very, very true. But rest assured, it will be complex, I'm sure. Cool. With all that being said, this will be, life on Drax. Io/tv/requestreview in about a week week's time. Also, last episode, you can find there too. Should be if you're watching this on DirectTV, it's probably somewhere over there, there, there, or down there. Is this the point where we say, like, and subscribe? No. We don't have that yet. It's exactly we thank you all for joining.",[216,217,218],"a09a4ffe-1990-4313-bf15-fd8840a5036d","72808a44-0e90-42e5-b48d-7298d0ed4294","b60bedda-7ca6-412b-ae38-c664024bcebe",[],{"reps":221},[222,278],{"name":223,"sdr":8,"link":224,"countries":225,"states":227},"John Daniels","https://meet.directus.io/meetings/john2144/john-contact-form-meeting",[226],"United States",[228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277],"Michigan","Indiana","Ohio","West Virginia","Kentucky","Virginia","Tennessee","North Carolina","South Carolina","Georgia","Florida","Alabama","Mississippi","New York","MI","IN","OH","WV","KY","VA","TN","NC","SC","GA","FL","AL","MS","NY","Connecticut","CT","Delaware","DE","Maine","ME","Maryland","MD","Massachusetts","MA","New Hampshire","NH","New Jersey","NJ","Pennsylvania","PA","Rhode Island","RI","Vermont","VT","Washington DC","DC",{"name":279,"link":280,"countries":281},"Michelle Riber","https://meetings.hubspot.com/mriber",[282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,259,470,471],"Albania","ALB","Algeria","DZA","Andorra","AND","Angola","AGO","Austria","AUT","Belgium","BEL","Benin","BEN","Bosnia and Herzegovina","BIH","Botswana","BWA","Bulgaria","BGR","Burkina Faso","BFA","Burundi","BDI","Cameroon","CMR","Cape Verde","CPV","Central African Republic","CAF","Chad","TCD","Comoros","COM","Côte d'Ivoire","CIV","Croatia","HRV","Czech Republic","CZE","Democratic Republic of Congo","COD","Denmark","DNK","Djibouti","DJI","Egypt","EGY","Equatorial Guinea","GNQ","Eritrea","ERI","Estonia","EST","Eswatini","SWZ","Ethiopia","ETH","Finland","FIN","France","FRA","Gabon","GAB","Gambia","GMB","Ghana","GHA","Greece","GRC","Guinea","GIN","Guinea-Bissau","GNB","Hungary","HUN","Iceland","ISL","Ireland","IRL","Italy","ITA","Kenya","KEN","Latvia","LVA","Lesotho","LSO","Liberia","LBR","Libya","LBY","Liechtenstein","LIE","Lithuania","LTU","Luxembourg","LUX","Madagascar","MDG","Malawi","MWI","Mali","MLI","Malta","MLT","Mauritania","MRT","Mauritius","MUS","Moldova","MDA","Monaco","MCO","Montenegro","MNE","Morocco","MAR","Mozambique","MOZ","Namibia","NAM","Niger","NER","Nigeria","NGA","North Macedonia","MKD","Norway","NOR","Poland","POL","Portugal","PRT","Republic of Congo","COG","Romania","ROU","Rwanda","RWA","San Marino","SMR","São Tomé and Príncipe","STP","Senegal","SEN","Serbia","SRB","Seychelles","SYC","Sierra Leone","SLE","Slovakia","SVK","Slovenia","SVN","Somalia","SOM","South Africa","ZAF","South Sudan","SSD","Spain","ESP","Sudan","SDN","Sweden","SWE","Tanzania","TZA","Togo","TGO","Tunisia","TUN","Uganda","UGA","United Kingdom","GBR","Vatican City","VAT","Zambia","ZMB","Zimbabwe","ZWE","UK","Germany","Netherlands","Switzerland","CH","NL",1773850421430]